/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3485 by gross, Thu Mar 24 22:44:40 2011 UTC revision 3887 by gross, Thu Apr 5 03:14:59 2012 UTC
# Line 18  Line 18 
18    
19  /**************************************************************/  /**************************************************************/
20    
21  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                                */  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                 */
22    
23  /**************************************************************/  /**************************************************************/
24    
25  #define SHOW_TIMING FALSE  #define SHOW_TIMING FALSE
26    #define MY_DEBUG 0
27    #define MY_DEBUG1 1
28    
29  #include "Paso.h"  #include "Paso.h"
30  #include "Preconditioner.h"  #include "Preconditioner.h"
# Line 47  void Paso_Preconditioner_AMG_free(Paso_P Line 49  void Paso_Preconditioner_AMG_free(Paso_P
49      MEMFREE(in->r);      MEMFREE(in->r);
50      MEMFREE(in->x_C);      MEMFREE(in->x_C);
51      MEMFREE(in->b_C);      MEMFREE(in->b_C);
52            Paso_MergedSolver_free(in->merged_solver);
53    
54      MEMFREE(in);      MEMFREE(in);
55       }       }
# Line 89  dim_t Paso_Preconditioner_AMG_getNumCoar Line 92  dim_t Paso_Preconditioner_AMG_getNumCoar
92  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
93    
94    Paso_Preconditioner_AMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
95      Paso_SystemMatrix *A_C=NULL;
96    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
97    
98    const dim_t my_n=A_p->mainBlock->numRows;    const dim_t my_n=A_p->mainBlock->numRows;
# Line 97  Paso_Preconditioner_AMG* Paso_Preconditi Line 101  Paso_Preconditioner_AMG* Paso_Preconditi
101    const dim_t n = my_n + overlap_n;    const dim_t n = my_n + overlap_n;
102    
103    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
104    index_t* F_marker=NULL, *counter=NULL;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
105    dim_t i;    dim_t i, my_n_F, my_n_C, n_C, F_flag, *F_set=NULL, global_n_C=0, global_n_F=0, n_F;
106    double time0=0;    double time0=0;
107    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
108    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
109    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
110    const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);    const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);
111    
112    
     
113    /*    /*
114        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening?
115                
116    */    */
117    if ( (sparsity >= options->min_coarse_sparsity) ||    if ( (sparsity >= options->min_coarse_sparsity) ||
118         (total_n <= options->min_coarse_matrix_size) ||         (global_n <= options->min_coarse_matrix_size) ||
119         (level > options->level_max) ) {         (level > options->level_max) ) {
120    
121          if (verbose) {          if (verbose) {
# Line 124  Paso_Preconditioner_AMG* Paso_Preconditi Line 128  Paso_Preconditioner_AMG* Paso_Preconditi
128            printf("Paso_Preconditioner: AMG: termination of coarsening by ");            printf("Paso_Preconditioner: AMG: termination of coarsening by ");
129    
130            if (sparsity >= options->min_coarse_sparsity)            if (sparsity >= options->min_coarse_sparsity)
131                printf("SPAR ");                printf("SPAR");
132    
133            if (total_n <= options->min_coarse_matrix_size)            if (global_n <= options->min_coarse_matrix_size)
134                printf("SIZE ");                printf("SIZE");
135    
136            if (level > options->level_max)            if (level > options->level_max)
137                printf("LEVEL ");                printf("LEVEL");
138    
139            printf("\n");            printf("\n");
140    
141          printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",          printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
142             level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);               level,  options->level_max, sparsity, options->min_coarse_sparsity, global_n, options->min_coarse_matrix_size);  
143    
144         }         }
145    
# Line 144  Paso_Preconditioner_AMG* Paso_Preconditi Line 148  Paso_Preconditioner_AMG* Paso_Preconditi
148       /* Start Coarsening : */       /* Start Coarsening : */
149            
150       /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */       /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
151       const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;       const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len  + A_p->row_coupleBlock->numRows * A_p->col_coupleBlock->numCols;
152    
153       dim_t* degree_S=TMPMEMALLOC(n, dim_t);       dim_t* degree_S=TMPMEMALLOC(n, dim_t);
154       index_t *offset_S=TMPMEMALLOC(n, index_t);       index_t *offset_S=TMPMEMALLOC(n, index_t);
# Line 163  Paso_Preconditioner_AMG* Paso_Preconditi Line 167  Paso_Preconditioner_AMG* Paso_Preconditi
167             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
168      */      */
169          Paso_SystemMatrix_copyColCoupleBlock(A_p);          Paso_SystemMatrix_copyColCoupleBlock(A_p);
170          /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */          Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE);
171    
172      /*      /*
173            set splitting of unknows:            set splitting of unknows:
# Line 176  Paso_Preconditioner_AMG* Paso_Preconditi Line 180  Paso_Preconditioner_AMG* Paso_Preconditi
180             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
181       }       }
182       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
183        /*   Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
184  {   */
185     dim_t p;  
    for (i=0; i<n; ++i) {  
          printf("%d: ",i);  
         for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);  
         printf("\n");  
    }  
 }  
186       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
187                              degree_S, offset_S, S, degree_ST, offset_ST, ST,                              degree_S, offset_S, S, degree_ST, offset_ST, ST,
188                          A_p->col_coupler->connector,A_p->col_distribution);                          A_p->col_coupler->connector,A_p->col_distribution);
 Esys_setError(SYSTEM_ERROR, "AMG:DONE.");  
 return NULL;  
189                
190    
191  /*MPI:           /* in BoomerAMG if interpolation is used FF connectivity is required */
      Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);  
 */  
       
          /* in BoomerAMG interpolation is used FF connectiovity is required :*/  
192  /*MPI:  /*MPI:
193           if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)           if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
194                               Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
195  */  */
196    
197       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
   
 #ifdef AAAAA  
198       if (Esys_noError() ) {       if (Esys_noError() ) {
199          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
200          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
# Line 212  return NULL; Line 202  return NULL;
202          /*          /*
203             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
204          */          */
205          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);          my_n_F=Paso_Util_cumsum_maskedTrue(my_n,counter, F_marker);
206          n_C=n-n_F;          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
207          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);              /* collect my_n_F values on all processes, a direct solver should
208                        be used if any my_n_F value is 0 */
209          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */              F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
210            #ifdef ESYS_MPI
211                MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
212            #endif
213                global_n_F=0;
214                F_flag = 1;
215                for (i=0; i<A_p->mpi_info->size; i++) {
216                    global_n_F+=F_set[i];
217                    if (F_set[i] == 0) F_flag = 0;
218                }
219                TMPMEMFREE(F_set);
220    
221            my_n_C=my_n-my_n_F;
222                global_n_C=global_n-global_n_F;
223            if (verbose) printf("Paso_Preconditioner: AMG (non-local) level %d: %d unknowns are flagged for elimination. %d left.\n",level,global_n_F,global_n_C);
224    
225            
226    /*          if ( n_F == 0 ) {  is a nasty case. a direct solver should be used, return NULL */
227                if (F_flag == 0) {
228             out = NULL;             out = NULL;
229          } else {          } else {
230             out=MEMALLOC(1,Paso_Preconditioner_AMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
231             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
232            out->level = level;            out->level = level;
           out->n = n;  
           out->n_F = n_F;  
           out->n_block = n_block;  
233            out->A_C = NULL;            out->A_C = NULL;
234            out->P = NULL;              out->P = NULL;  
235            out->R = NULL;                      out->R = NULL;          
# Line 235  return NULL; Line 240  return NULL;
240            out->b_C = NULL;            out->b_C = NULL;
241            out->AMG_C = NULL;            out->AMG_C = NULL;
242            out->Smoother=NULL;            out->Smoother=NULL;
243                      out->merged_solver=NULL;
244             }             }
245             mask_C=TMPMEMALLOC(n,index_t);             mask_C=TMPMEMALLOC(n,index_t);
246             rows_in_F=TMPMEMALLOC(n_F,index_t);             rows_in_F=TMPMEMALLOC(n_F,index_t);
# Line 242  return NULL; Line 248  return NULL;
248             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
249             if ( Esys_noError() ) {             if ( Esys_noError() ) {
250    
251            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), 0, verbose);
252                
253            if (n_C != 0) {            if (global_n_C != 0) {
254                 /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */              /*  create mask of C nodes with value >-1, gives new id */
255                n_C=Paso_Util_cumsum_maskedFalse(n, mask_C, F_marker);
256                /* if nothing has been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
257        
258              /* allocate helpers :*/              /* allocate helpers :*/
259              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*my_n_C,double);
260              out->b_C=MEMALLOC(n_block*n_C,double);              out->b_C=MEMALLOC(n_block*my_n_C,double);
261              out->r=MEMALLOC(n_block*n,double);              out->r=MEMALLOC(n_block*my_n,double);
262                            
263              Esys_checkPtr(out->r);              Esys_checkPtr(out->r);
264              Esys_checkPtr(out->x_C);              Esys_checkPtr(out->x_C);
# Line 265  return NULL; Line 273  return NULL;
273                   if  (F_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
274                    }                    }
275                 }                 }
                /*  create mask of C nodes with value >-1 gives new id */  
                i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);  
   
                #pragma omp parallel for private(i) schedule(static)  
                for (i = 0; i < n; ++i) {  
                   if  (F_marker[i]) {  
                  mask_C[i]=-1;  
                   } else {  
                  mask_C[i]=counter[i];;  
                   }  
                }  
276                 /*                 /*
277                    get Prolongation :                        get Prolongation :    
278                 */                                   */                  
279    
280                 time0=Esys_timer();                 time0=Esys_timer();
281  /*MPI:  
282                 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);                 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,offset_S, degree_S,S,n_C,mask_C, options->interpolation_method);
283  */  
                if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);  
284              }              }
285    
286              /*                    /*      
287                 construct Restriction operator as transposed of Prolongation operator:                 construct Restriction operator as transposed of Prolongation operator:
288              */              */
289    
290              if ( Esys_noError()) {              if ( Esys_noError()) {
291                 time0=Esys_timer();                 time0=Esys_timer();
292  /*MPI:  
293                 out->R=Paso_SystemMatrix_getTranspose(out->P);                 out->R=Paso_Preconditioner_AMG_getRestriction(out->P);
294  */  
295                 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
296              }                    }      
297              /*              /*
298              construct coarse level matrix:                          construct coarse level matrix:
299              */                          */
300              if ( Esys_noError()) {                          if ( Esys_noError()) {
301                 time0=Esys_timer();                             time0=Esys_timer();
 /*MPI:  
                Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);  
                A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);  
                Paso_Preconditioner_AMG_setStrongConnections  
                Paso_SystemMatrix_free(Atemp);  
 */  
302    
303                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                                         A_C = Paso_Preconditioner_AMG_buildInterpolationOperator(A_p, out->P, out->R);
304              }  
305                               if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
306                            }
307    
               
308              /*              /*
309                 constructe courser level:                 constructe courser level:
310                                
# Line 318  return NULL; Line 312  return NULL;
312              if ( Esys_noError()) {              if ( Esys_noError()) {
313                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
314              }              }
315    
316              if ( Esys_noError()) {              if ( Esys_noError()) {
317                 if ( out->AMG_C == NULL ) {                out->A_C=A_C;
318                    out->reordering = options->reordering;                if ( out->AMG_C == NULL ) {
319                    out->refinements = options->coarse_matrix_refinements;                    /* merge the system matrix into 1 rank when
320                    /* no coarse level matrix has been constructed. use direct solver */                   it's not suitable coarsening due to the
321                    #ifdef MKL                   total number of unknowns are too small */
322                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                                out->merged_solver= Paso_MergedSolver_alloc(A_C, options);
323                      Paso_SystemMatrix_free(A_C);                }
                     out->A_C->solver_package = PASO_MKL;  
                     if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                   #else  
                     #ifdef UMFPACK  
                        out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);  
                        Paso_SystemMatrix_free(A_C);  
                        out->A_C->solver_package = PASO_UMFPACK;  
                        if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                     #else  
                        out->A_C=A_C;  
                        out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);  
                        out->A_C->solver_package = PASO_SMOOTHER;  
                        if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);  
                     #endif  
                   #endif  
                } else {  
                   /* finally we set some helpers for the solver step */  
                   out->A_C=A_C;  
                }  
324              }                      }        
325            }            }
326             }             }
# Line 352  return NULL; Line 328  return NULL;
328             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
329          }          }
330       }       }
 #endif  
331    
332    }    }
333    TMPMEMFREE(counter);    TMPMEMFREE(counter);
# Line 376  return NULL; Line 351  return NULL;
351    
352    
353  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
354       const dim_t n = amg->n * amg->n_block;       const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
355       double time0=0;       double time0=0;
356       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
357       const dim_t pre_sweeps=amg->pre_sweeps;       const dim_t pre_sweeps=amg->pre_sweeps;
# Line 384  void Paso_Preconditioner_AMG_solve(Paso_ Line 359  void Paso_Preconditioner_AMG_solve(Paso_
359       /* presmoothing */       /* presmoothing */
360       time0=Esys_timer();       time0=Esys_timer();
361       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
362    
363       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
364       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
365       /* end of presmoothing */       /* end of presmoothing */
366            
367       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       time0=Esys_timer();
368           time0=Esys_timer();  
369       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
370       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
371       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
372           time0=Esys_timer()-time0;  
373       /* coarse level solve */       time0=Esys_timer()-time0;
374       if ( amg->AMG_C == NULL) {       /* coarse level solve */
375         if ( amg->AMG_C == NULL) {
376          time0=Esys_timer();          time0=Esys_timer();
377          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
378  #ifdef FIXME              Paso_MergedSolver_solve(amg->merged_solver,amg->x_C, amg->b_C);
         switch (amg->A_C->solver_package) {  
            case (PASO_MKL):  
           Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_UMFPACK):  
           Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_SMOOTHER):  
           Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);  
           break;  
         }  
 #endif  
         Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);  
379          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
380       } else {       } else {
381          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
      }    
      time0=time0+Esys_timer();  
      Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */      
       
          /*postsmoothing*/  
         
         /*solve Ax=b with initial guess x */  
         time0=Esys_timer();  
         Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);  
         time0=Esys_timer()-time0;  
         if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);  
         /*end of postsmoothing*/  
       
382       }       }
383       return;  
384        time0=time0+Esys_timer();
385        Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
386    
387        /*postsmoothing*/
388          
389        /*solve Ax=b with initial guess x */
390        time0=Esys_timer();
391        Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
392        time0=Esys_timer()-time0;
393        if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
394        return;
395  }  }
396    
397  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 451  void Paso_Preconditioner_AMG_setStrongCo Line 413  void Paso_Preconditioner_AMG_setStrongCo
413     index_t iptr, i;     index_t iptr, i;
414     double *threshold_p=NULL;     double *threshold_p=NULL;
415    
   
416     threshold_p = TMPMEMALLOC(2*my_n, double);     threshold_p = TMPMEMALLOC(2*my_n, double);
417        
418     #pragma omp parallel for private(i,iptr) schedule(static)     #pragma omp parallel for private(i,iptr) schedule(static)
# Line 461  void Paso_Preconditioner_AMG_setStrongCo Line 422  void Paso_Preconditioner_AMG_setStrongCo
422       register double sum_row=0;       register double sum_row=0;
423       register double main_row=0;       register double main_row=0;
424       register dim_t kdeg=0;       register dim_t kdeg=0;
425           const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];           register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
426    
427                    
428       /* collect information for row i: */       /* collect information for row i: */
429       #pragma ivdep       #pragma ivdep
430       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
431          register index_t j=A->mainBlock->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
432          register double fnorm=ABS(A->mainBlock->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
           
433          if( j != i) {          if( j != i) {
434             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
435             sum_row+=fnorm;             sum_row+=fnorm;
# Line 477  void Paso_Preconditioner_AMG_setStrongCo Line 438  void Paso_Preconditioner_AMG_setStrongCo
438          }          }
439    
440       }       }
441        
442       #pragma ivdep       #pragma ivdep
443       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
444          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
445    
446          max_offdiagonal = MAX(max_offdiagonal,fnorm);          max_offdiagonal = MAX(max_offdiagonal,fnorm);
447          sum_row+=fnorm;          sum_row+=fnorm;
448       }       }
# Line 489  void Paso_Preconditioner_AMG_setStrongCo Line 451  void Paso_Preconditioner_AMG_setStrongCo
451           {           {
452          const double threshold = theta*max_offdiagonal;          const double threshold = theta*max_offdiagonal;
453              threshold_p[2*i+1]=threshold;              threshold_p[2*i+1]=threshold;
454          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal dominance */
455                 threshold_p[2*i]=1;                 threshold_p[2*i]=1;
456             #pragma ivdep             #pragma ivdep
457             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
# Line 514  void Paso_Preconditioner_AMG_setStrongCo Line 476  void Paso_Preconditioner_AMG_setStrongCo
476           offset_S[i]=koffset;           offset_S[i]=koffset;
477       degree_S[i]=kdeg;       degree_S[i]=kdeg;
478        }        }
479    
480        /* now we need to distribute the threshold and the diagonal dominance indicator */        /* now we need to distribute the threshold and the diagonal dominance indicator */
481        if (A->mpi_info->size > 1) {        if (A->mpi_info->size > 1) {
482    
# Line 529  void Paso_Preconditioner_AMG_setStrongCo Line 492  void Paso_Preconditioner_AMG_setStrongCo
492    
493            #pragma omp parallel for private(i,iptr) schedule(static)            #pragma omp parallel for private(i,iptr) schedule(static)
494            for (i=0; i<overlap_n; i++) {            for (i=0; i<overlap_n; i++) {
           
495            const double threshold = remote_threshold[2*i+1];            const double threshold = remote_threshold[2*i+1];
496            register dim_t kdeg=0;            register dim_t kdeg=0;
497                const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];                register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
498                if (remote_threshold[2*i]>0) {                if (remote_threshold[2*i]>0) {
499               #pragma ivdep          #pragma ivdep
500               for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
501                register index_t j=A->row_coupleBlock->pattern->index[iptr];                register index_t j=A->row_coupleBlock->pattern->index[iptr];
502                if(ABS(A->row_coupleBlock->val[iptr])>threshold) {                if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
503               S[koffset+kdeg] = j ;               S[koffset+kdeg] = j ;
504               kdeg++;               kdeg++;
505                }                }
506             }          }
507    
508            #pragma ivdep
509            for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
510              register index_t j=A->remote_coupleBlock->pattern->index[iptr];
511              if(ABS(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
512                  S[koffset+kdeg] = j + my_n;
513                  kdeg++;
514              }
515            }
516                }                }
517                offset_S[i+my_n]=koffset;                offset_S[i+my_n]=koffset;
518            degree_S[i+my_n]=kdeg;            degree_S[i+my_n]=kdeg;
# Line 564  void Paso_Preconditioner_AMG_setStrongCo Line 534  void Paso_Preconditioner_AMG_setStrongCo
534                              const double theta, const double tau)                              const double theta, const double tau)
535    
536  {  {
537     const dim_t n_block=A->block_size;     const dim_t block_size=A->block_size;
538     const dim_t my_n=A->mainBlock->numRows;     const dim_t my_n=A->mainBlock->numRows;
539     const dim_t overlap_n=A->row_coupleBlock->numRows;     const dim_t overlap_n=A->row_coupleBlock->numRows;
540        
# Line 573  void Paso_Preconditioner_AMG_setStrongCo Line 543  void Paso_Preconditioner_AMG_setStrongCo
543        
544        
545     threshold_p = TMPMEMALLOC(2*my_n, double);     threshold_p = TMPMEMALLOC(2*my_n, double);
546      
547     #pragma omp parallel private(i,iptr, bi)     #pragma omp parallel private(i,iptr,bi)
548     {     {
549        
550        dim_t max_deg=0;        dim_t max_deg=0;
# Line 593  void Paso_Preconditioner_AMG_setStrongCo Line 563  void Paso_Preconditioner_AMG_setStrongCo
563       register double main_row=0;       register double main_row=0;
564       register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];       register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
565       register dim_t kdeg=0;       register dim_t kdeg=0;
566       const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];       register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
567            
568       /* collect information for row i: */       /* collect information for row i: */
569       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
570          register index_t j=A->mainBlock->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
571          register double fnorm=0;          register double fnorm=0;
572          #pragma ivdep          #pragma ivdep
573          for(bi=0;bi<n_block;++bi) {          for(bi=0;bi<block_size;++bi) {
574              register double  rtmp2= A->mainBlock->val[iptr*n_block+bi];              register double  rtmp2= A->mainBlock->val[iptr*block_size+bi];
575             fnorm+=rtmp2*rtmp2;             fnorm+=rtmp2*rtmp2;
576          }          }
577          fnorm=sqrt(fnorm);          fnorm=sqrt(fnorm);
# Line 620  void Paso_Preconditioner_AMG_setStrongCo Line 590  void Paso_Preconditioner_AMG_setStrongCo
590       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
591          register double fnorm=0;          register double fnorm=0;
592          #pragma ivdep          #pragma ivdep
593          for(bi=0;bi<n_block;++bi) {          for(bi=0;bi<block_size;++bi) {
594             register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];             register double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
595             fnorm+=rtmp2*rtmp2;             fnorm+=rtmp2*rtmp2;
596          }          }
597          fnorm=sqrt(fnorm);          fnorm=sqrt(fnorm);
# Line 638  void Paso_Preconditioner_AMG_setStrongCo Line 608  void Paso_Preconditioner_AMG_setStrongCo
608          rtmp_offset=-A->mainBlock->pattern->ptr[i];          rtmp_offset=-A->mainBlock->pattern->ptr[i];
609                    
610          threshold_p[2*i+1]=threshold;          threshold_p[2*i+1]=threshold;
611          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal dominance */
612             threshold_p[2*i]=1;             threshold_p[2*i]=1;
            rtmp_offset=-A->mainBlock->pattern->ptr[i];  
613             #pragma ivdep             #pragma ivdep
614             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
615            register index_t j=A->mainBlock->pattern->index[iptr];            register index_t j=A->mainBlock->pattern->index[iptr];
# Line 685  void Paso_Preconditioner_AMG_setStrongCo Line 654  void Paso_Preconditioner_AMG_setStrongCo
654            
655       const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];       const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
656       register dim_t kdeg=0;       register dim_t kdeg=0;
657       const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];       register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
658       if (remote_threshold[2*i]>0) {       if (remote_threshold[2*i]>0) {
659          #pragma ivdep          #pragma ivdep
660          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {          for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
661             register index_t j=A->row_coupleBlock->pattern->index[iptr];             register index_t j=A->row_coupleBlock->pattern->index[iptr];
662             register double fnorm2=0;             register double fnorm2=0;
663             #pragma ivdepremote_threshold[2*i]             #pragma ivdepremote_threshold[2*i]
664             for(bi=0;bi<n_block;++bi) {             for(bi=0;bi<block_size;++bi) {
665            register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];            register double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
666            fnorm2+=rtmp2*rtmp2;            fnorm2+=rtmp2*rtmp2;
667             }             }
668                        
# Line 702  void Paso_Preconditioner_AMG_setStrongCo Line 671  void Paso_Preconditioner_AMG_setStrongCo
671            kdeg++;            kdeg++;
672             }             }
673          }          }
674    
675            #pragma ivdep
676                for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
677                   register index_t j=A->remote_coupleBlock->pattern->index[iptr];
678                   register double fnorm2=0;
679                   #pragma ivdepremote_threshold[2*i]
680                   for(bi=0;bi<block_size;++bi) {
681                      register double rtmp2 = A->remote_coupleBlock->val[iptr*block_size+bi];
682                      fnorm2+=rtmp2*rtmp2;
683                   }
684                   if(fnorm2 > threshold2 && i != j) {
685                      S[koffset+kdeg] = j + my_n;
686                      kdeg++;
687                   }
688                }
689                    
690       }       }
691       offset_S[i+my_n]=koffset;       offset_S[i+my_n]=koffset;
# Line 711  void Paso_Preconditioner_AMG_setStrongCo Line 695  void Paso_Preconditioner_AMG_setStrongCo
695     }     }
696     TMPMEMFREE(threshold_p);     TMPMEMFREE(threshold_p);
697  }  }
698    
699  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
700                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
701  {  {
# Line 738  void Paso_Preconditioner_AMG_transposeSt Line 723  void Paso_Preconditioner_AMG_transposeSt
723     }     }
724  }  }
725    
726    int compareindex(const void *a, const void *b)
727    {
728      return (*(int *)a - *(int *)b);
729    }
730    
731  void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,  void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
732                          const dim_t* degree_S, const index_t* offset_S, const index_t* S,                          const dim_t* degree_S, const index_t* offset_S, const index_t* S,
733                          const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,                          const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
# Line 754  void Paso_Preconditioner_AMG_CIJPCoarsen Line 744  void Paso_Preconditioner_AMG_CIJPCoarsen
744    Status=TMPMEMALLOC(n, double);    Status=TMPMEMALLOC(n, double);
745    random = Paso_Distribution_createRandomVector(col_dist,1);    random = Paso_Distribution_createRandomVector(col_dist,1);
746    ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);    ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
747      
748    #pragma omp parallel for private(i)    #pragma omp parallel for private(i)
749    for (i=0; i< my_n; ++i) {    for (i=0; i< my_n; ++i) {
750        w[i]=degree_ST[i]+random[i];        w[i]=degree_ST[i]+random[i];
# Line 764  void Paso_Preconditioner_AMG_CIJPCoarsen Line 754  void Paso_Preconditioner_AMG_CIJPCoarsen
754         Status[i]=1; /* status undefined */         Status[i]=1; /* status undefined */
755        }        }
756    }    }
757    
758    #pragma omp parallel for private(i, iptr)    #pragma omp parallel for private(i, iptr)
759    for (i=0; i< n; ++i) {    for (i=0; i< n; ++i) {
760        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {
# Line 773  void Paso_Preconditioner_AMG_CIJPCoarsen Line 764  void Paso_Preconditioner_AMG_CIJPCoarsen
764    
765        
766    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
767    printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);    /* printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);  */
   
     
768    iter=0;    iter=0;
769    while (numUndefined > 0) {    while (numUndefined > 0) {
770       Paso_Coupler_fillOverlap(n, w, w_coupler);       Paso_Coupler_fillOverlap(n, w, w_coupler);
771       {  
772      int p;        /* calculate the maximum value of neighbours following active strong connections:
773      for (p=0; p<my_n; ++p) {          w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection is still active  */      
        printf(" %d : %f %f \n",p, w[p], Status[p]);  
     }  
     for (p=my_n; p<n; ++p) {  
        printf(" %d : %f \n",p, w[p]);  
     }  
       
       
      }  
       
       /* calculate the maximum value of naigbours following active strong connections:  
         w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active  */        
774        #pragma omp parallel for private(i, iptr)        #pragma omp parallel for private(i, iptr)
775        for (i=0; i<my_n; ++i) {        for (i=0; i<my_n; ++i) {
776       if (Status[i]>0) { /* status is still undefined */       if (Status[i]>0) { /* status is still undefined */
# Line 800  void Paso_Preconditioner_AMG_CIJPCoarsen Line 778  void Paso_Preconditioner_AMG_CIJPCoarsen
778          register bool_t inD=TRUE;          register bool_t inD=TRUE;
779          const double wi=w[i];          const double wi=w[i];
780    
   
781          for( iptr =0 ; iptr < degree_S[i]; ++iptr) {          for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
   
782             const index_t k=S[offset_S[i]+iptr];             const index_t k=S[offset_S[i]+iptr];
783             const index_t* start_p = &ST[offset_ST[k]];             const index_t* start_p = &ST[offset_ST[k]];
784             const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);             const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
785    
786             if (ST_flag[(index_t)(where_p-start_p)]>0) {             if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
 printf("S: %d (%e) -> %d    (%e)\n",i, wi, k, w[k]);        
787            if (wi <= w[k] ) {            if (wi <= w[k] ) {
788               inD=FALSE;               inD=FALSE;
789               break;               break;
# Line 821  printf("S: %d (%e) -> %d   (%e)\n",i, wi, Line 796  printf("S: %d (%e) -> %d   (%e)\n",i, wi,
796            for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {            for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
797               const index_t k=ST[offset_ST[i]+iptr];               const index_t k=ST[offset_ST[i]+iptr];
798               if ( ST_flag[offset_ST[i]+iptr] > 0 ) {               if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
 printf("ST: %d (%e) -> %d   (%e)\n",i, wi, k, w[k]);        
               
799                         if (wi <= w[k] ) {                         if (wi <= w[k] ) {
800                 inD=FALSE;                 inD=FALSE;
801                 break;                 break;
# Line 832  printf("ST: %d (%e) -> %d  (%e)\n",i, wi, Line 805  printf("ST: %d (%e) -> %d  (%e)\n",i, wi,
805          }              }    
806          if (inD) {          if (inD) {
807             Status[i]=0.; /* is in D */             Status[i]=0.; /* is in D */
 printf("%d is in D\n",i);  
808          }          }
809       }       }
810            
# Line 864  printf("%d is in D\n",i); Line 836  printf("%d is in D\n",i);
836            const index_t j=ST[offset_ST[i]+jptr];            const index_t j=ST[offset_ST[i]+jptr];
837            if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {            if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
838               w[i]--;               w[i]--;
 printf("%d reduced by %d\n",i,j);  
839               ST_flag[offset_ST[i]+jptr]=-1;               ST_flag[offset_ST[i]+jptr]=-1;
840            }            }
841             }             }
# Line 886  printf("%d reduced by %d\n",i,j); Line 857  printf("%d reduced by %d\n",i,j);
857    
858               for (jptr=0; jptr<degree_ST[i]; ++jptr) {               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
859              const index_t j=ST[offset_ST[i]+jptr];              const index_t j=ST[offset_ST[i]+jptr];
 printf("check connection: %d %d\n",i,j);  
860              ST_flag[offset_ST[i]+jptr]=-1;              ST_flag[offset_ST[i]+jptr]=-1;
861              for (kptr=0; kptr<degree_ST[j]; ++kptr) {              for (kptr=0; kptr<degree_ST[j]; ++kptr) {
862                 const index_t k=ST[offset_ST[j]+kptr];                 const index_t k=ST[offset_ST[j]+kptr];
 printf("check connection: %d of %d\n",k,j);  
863                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
 printf("found!\n");  
864                    if (ST_flag[offset_ST[j]+kptr] >0) {                    if (ST_flag[offset_ST[j]+kptr] >0) {
865                   if (j< my_n ) {                   if (j< my_n ) {
866                      w[j]--;                      w[j]--;
 printf("%d reduced by %d and %d \n",j, i,k);  
867                   }                   }
868                   ST_flag[offset_ST[j]+kptr]=-1;                   ST_flag[offset_ST[j]+kptr]=-1;
869                    }                    }
# Line 906  printf("%d reduced by %d and %d \n",j, i Line 873  printf("%d reduced by %d and %d \n",j, i
873            }            }
874          }          }
875       }       }
876    
877       /* adjust status */       /* adjust status */
878       #pragma omp parallel for private(i)       #pragma omp parallel for private(i)
879       for (i=0; i< my_n; ++i) {       for (i=0; i< my_n; ++i) {
880          if ( Status[i] == 0. ) {          if ( Status[i] == 0. ) {
881             Status[i] = -10;   /* this is now a C point */             Status[i] = -10;   /* this is now a C point */
882          } else if ( w[i]<1.) {          } else if (Status[i] == 1. && w[i]<1.) {
883             Status[i] = -100;   /* this is now a F point */               Status[i] = -100;   /* this is now a F point */  
884          }          }
885       }       }
886            
887             i = numUndefined;
888       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
889         if (numUndefined == i) {
890           Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
891           return;
892         }
893    
894       iter++;       iter++;
895       printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);       /* printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined); */
   } /* end of while loop */  
896    
897      } /* end of while loop */
898    
899    /* map to output :*/    /* map to output :*/
900    Paso_Coupler_fillOverlap(n, Status, w_coupler);    Paso_Coupler_fillOverlap(n, Status, w_coupler);
# Line 933  printf("%d reduced by %d and %d \n",j, i Line 906  printf("%d reduced by %d and %d \n",j, i
906          split_marker[i]=PASO_AMG_IN_F;          split_marker[i]=PASO_AMG_IN_F;
907       }       }
908    }    }
   
909    /* clean up : */    /* clean up : */
910    Paso_Coupler_free(w_coupler);    Paso_Coupler_free(w_coupler);
911    TMPMEMFREE(random);    TMPMEMFREE(random);
# Line 943  printf("%d reduced by %d and %d \n",j, i Line 915  printf("%d reduced by %d and %d \n",j, i
915        
916    return;    return;
917  }  }
918    

Legend:
Removed from v.3485  
changed lines
  Added in v.3887

  ViewVC Help
Powered by ViewVC 1.1.26