/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3352 by gross, Tue Nov 16 03:58:09 2010 UTC revision 4154 by jfenwick, Tue Jan 22 09:30:23 2013 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2010 by University of Queensland  * Copyright (c) 2003-2013 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  /**************************************************************/  /************************************************************************************/
18    
19  /* Paso: AMG preconditioner                                  */  /* Paso: AMG preconditioner  (local version)                  */
20    
21  /**************************************************************/  /************************************************************************************/
22    
23  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                                */  /* Author: artak@uq.edu.au, l.gross@uq.edu.au                 */
24    
25  /**************************************************************/  /************************************************************************************/
26    
27  #define SHOW_TIMING FALSE  #define SHOW_TIMING FALSE
28    #define MY_DEBUG 0
29    #define MY_DEBUG1 1
30    
31  #include "Paso.h"  #include "Paso.h"
32  #include "Preconditioner.h"  #include "Preconditioner.h"
# Line 30  Line 34 
34  #include "PasoUtil.h"  #include "PasoUtil.h"
35  #include "UMFPACK.h"  #include "UMFPACK.h"
36  #include "MKL.h"  #include "MKL.h"
37    #include<stdio.h>
38    
39    
40  /**************************************************************/  /************************************************************************************/
41    
42  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
43    
44  void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {  void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
45       if (in!=NULL) {       if (in!=NULL) {
46      Paso_Preconditioner_LocalSmoother_free(in->Smoother);      Paso_Preconditioner_Smoother_free(in->Smoother);
47      Paso_SparseMatrix_free(in->P);      Paso_SystemMatrix_free(in->P);
48      Paso_SparseMatrix_free(in->R);      Paso_SystemMatrix_free(in->R);
49      Paso_SparseMatrix_free(in->A_C);      Paso_SystemMatrix_free(in->A_C);
50      Paso_Preconditioner_LocalAMG_free(in->AMG_C);      Paso_Preconditioner_AMG_free(in->AMG_C);
51      MEMFREE(in->r);      MEMFREE(in->r);
52      MEMFREE(in->x_C);      MEMFREE(in->x_C);
53      MEMFREE(in->b_C);      MEMFREE(in->b_C);
54            Paso_MergedSolver_free(in->merged_solver);
55    
       
56      MEMFREE(in);      MEMFREE(in);
57       }       }
58  }  }
59    
60  /*****************************************************************  index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
61       if (in->AMG_C == NULL) {
62          return in->level;
63       } else {
64          return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
65       }
66    }
67    double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
68          if (in->AMG_C == NULL) {
69         if (in->A_C == NULL) {
70            return 1.;
71         } else {
72            return Paso_SystemMatrix_getSparsity(in->A_C);
73         }
74          } else {
75            return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
76          }
77    }
78    dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
79       if (in->AMG_C == NULL) {
80          if (in->A_C == NULL) {
81         return 0;
82          } else {
83         return Paso_SystemMatrix_getTotalNumRows(in->A_C);
84          }
85       } else {
86         return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
87       }
88    }
89    /***************************************************************************************
90    
91     constructs AMG     constructs AMG
92        
93  ******************************************************************/  ****************************************************************************************/
94  Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
95    
96    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
97      Paso_SystemMatrix *A_C=NULL;
98    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
99    
100      const dim_t my_n=A_p->mainBlock->numRows;
101      const dim_t overlap_n=A_p->row_coupleBlock->numRows;
102        
103    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;    const dim_t n = my_n + overlap_n;
104    const dim_t n=A_p->numRows;  
105    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
106    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
107    dim_t n_F=0, n_C=0, i;    dim_t i, my_n_F, my_n_C, n_C, F_flag, *F_set=NULL, global_n_C=0, global_n_F=0, n_F;
108    double time0=0;    double time0=0;
109    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
110    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
111        const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
112        const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);
113    
114    
115    /*    /*
116        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening?
117                
118    */    */
119    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (sparsity >= options->min_coarse_sparsity) ||
120       if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",         (global_n <= options->min_coarse_matrix_size) ||
121      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );           (level > options->level_max) ) {
122       return NULL;  
123    }          if (verbose) {
124              /*
125                  print stopping condition:
126                          - 'SPAR' = min_coarse_matrix_sparsity exceeded
127                          - 'SIZE' = min_coarse_matrix_size exceeded
128                          - 'LEVEL' = level_max exceeded
129              */
130              printf("Paso_Preconditioner: AMG: termination of coarsening by ");
131    
132              if (sparsity >= options->min_coarse_sparsity)
133                  printf("SPAR");
134    
135              if (global_n <= options->min_coarse_matrix_size)
136                  printf("SIZE");
137    
138              if (level > options->level_max)
139                  printf("LEVEL");
140    
141              printf("\n");
142    
143            printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
144               level,  options->level_max, sparsity, options->min_coarse_sparsity, global_n, options->min_coarse_matrix_size);  
145    
146           }
147    
148           return NULL;
149      }  else {
150       /* Start Coarsening : */       /* Start Coarsening : */
151        
152         /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
153         const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len  + A_p->row_coupleBlock->numRows * A_p->col_coupleBlock->numCols;
154    
155       split_marker=TMPMEMALLOC(n,index_t);       dim_t* degree_S=TMPMEMALLOC(n, dim_t);
156         index_t *offset_S=TMPMEMALLOC(n, index_t);
157         index_t *S=TMPMEMALLOC(len_S, index_t);
158         dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
159         index_t *offset_ST=TMPMEMALLOC(n, index_t);
160         index_t *ST=TMPMEMALLOC(len_S, index_t);
161        
162        
163         F_marker=TMPMEMALLOC(n,index_t);
164       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
165       degree=TMPMEMALLOC(n, dim_t);  
166       S=TMPMEMALLOC(A_p->pattern->len, index_t);       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
167       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {      || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
168       /*      /*
169               make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
170        */
171            Paso_SystemMatrix_copyColCoupleBlock(A_p);
172            Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE);
173    
174        /*
175            set splitting of unknows:            set splitting of unknows:
176                  
177           */           */
178       time0=Esys_timer();       time0=Esys_timer();
179       if (n_block>1) {       if (n_block>1) {
180             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
181       } else {       } else {
182             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
183       }       }
184       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
185    /*   Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
186     */
187    
188         Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189                                degree_S, offset_S, S, degree_ST, offset_ST, ST,
190                            A_p->col_coupler->connector,A_p->col_distribution);
191          
192    
193             /* in BoomerAMG if interpolation is used FF connectivity is required */
194    /*MPI:
195             if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
196                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
197    */
198    
199       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);       options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
       
200       if (Esys_noError() ) {       if (Esys_noError() ) {
201          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
202          for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
203            
204          /*          /*
205             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
206          */          */
207          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          my_n_F=Paso_Util_cumsum_maskedTrue(my_n,counter, F_marker);
208          n_C=n-n_F;          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
209          if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);              /* collect my_n_F values on all processes, a direct solver should
210                        be used if any my_n_F value is 0 */
211          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */              F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
212            #ifdef ESYS_MPI
213                MPI_Allgather(&my_n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
214            #endif
215                global_n_F=0;
216                F_flag = 1;
217                for (i=0; i<A_p->mpi_info->size; i++) {
218                    global_n_F+=F_set[i];
219                    if (F_set[i] == 0) F_flag = 0;
220                }
221                TMPMEMFREE(F_set);
222    
223            my_n_C=my_n-my_n_F;
224                global_n_C=global_n-global_n_F;
225            if (verbose) printf("Paso_Preconditioner: AMG (non-local) level %d: %d unknowns are flagged for elimination. %d left.\n",level,global_n_F,global_n_C);
226    
227            
228    /*          if ( n_F == 0 ) {  is a nasty case. a direct solver should be used, return NULL */
229                if (F_flag == 0) {
230             out = NULL;             out = NULL;
231          } else {          } else {
232             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
233             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
234            out->level = level;            out->level = level;
           out->n = n;  
           out->n_F = n_F;  
           out->n_block = n_block;  
235            out->A_C = NULL;            out->A_C = NULL;
236            out->P = NULL;              out->P = NULL;  
237            out->R = NULL;                      out->R = NULL;          
# Line 130  Paso_Preconditioner_LocalAMG* Paso_Preco Line 242  Paso_Preconditioner_LocalAMG* Paso_Preco
242            out->b_C = NULL;            out->b_C = NULL;
243            out->AMG_C = NULL;            out->AMG_C = NULL;
244            out->Smoother=NULL;            out->Smoother=NULL;
245                      out->merged_solver=NULL;
246             }             }
247             mask_C=TMPMEMALLOC(n,index_t);             mask_C=TMPMEMALLOC(n,index_t);
248             rows_in_F=TMPMEMALLOC(n_F,index_t);             rows_in_F=TMPMEMALLOC(n_F,index_t);
# Line 137  Paso_Preconditioner_LocalAMG* Paso_Preco Line 250  Paso_Preconditioner_LocalAMG* Paso_Preco
250             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
251             if ( Esys_noError() ) {             if ( Esys_noError() ) {
252    
253            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), 0, verbose);
254                
255            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if (global_n_C != 0) {
256                /*  create mask of C nodes with value >-1, gives new id */
257                n_C=Paso_Util_cumsum_maskedFalse(n, mask_C, F_marker);
258                /* if nothing has been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
259        
260              /* allocate helpers :*/              /* allocate helpers :*/
261              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*my_n_C,double);
262              out->b_C=MEMALLOC(n_block*n_C,double);              out->b_C=MEMALLOC(n_block*my_n_C,double);
263              out->r=MEMALLOC(n_block*n,double);              out->r=MEMALLOC(n_block*my_n,double);
264                            
265              Esys_checkPtr(out->r);              Esys_checkPtr(out->r);
266              Esys_checkPtr(out->x_C);              Esys_checkPtr(out->x_C);
# Line 156  Paso_Preconditioner_LocalAMG* Paso_Preco Line 272  Paso_Preconditioner_LocalAMG* Paso_Preco
272                 {                 {
273                    #pragma omp for schedule(static)                    #pragma omp for schedule(static)
274                    for (i = 0; i < n; ++i) {                    for (i = 0; i < n; ++i) {
275                   if  (split_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
                   }  
                }  
                /*  create mask of C nodes with value >-1 gives new id */  
                i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);  
   
                #pragma omp parallel for private(i) schedule(static)  
                for (i = 0; i < n; ++i) {  
                   if  (split_marker[i]) {  
                  mask_C[i]=-1;  
                   } else {  
                  mask_C[i]=counter[i];;  
276                    }                    }
277                 }                 }
278                 /*                 /*
279                    get Restriction :                      get Prolongation :    
280                 */                                   */                  
281    
282                 time0=Esys_timer();                 time0=Esys_timer();
283                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);  
284                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 out->P=Paso_Preconditioner_AMG_getProlongation(A_p,offset_S, degree_S,S,n_C,mask_C, options->interpolation_method);
285    
286              }              }
287    
288              /*                    /*      
289                 construct Prolongation operator as transposed of restriction operator:                 construct Restriction operator as transposed of Prolongation operator:
290              */              */
291    
292              if ( Esys_noError()) {              if ( Esys_noError()) {
293                 time0=Esys_timer();                 time0=Esys_timer();
294                 out->R=Paso_SparseMatrix_getTranspose(out->P);  
295                 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 out->R=Paso_Preconditioner_AMG_getRestriction(out->P);
296    
297                   if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
298              }                    }      
299              /*              /*
300              construct coarse level matrix:                          construct coarse level matrix:
301              */                          */
302              if ( Esys_noError()) {                          if ( Esys_noError()) {
303                 time0=Esys_timer();                             time0=Esys_timer();
304                 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  
305                 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);                             A_C = Paso_Preconditioner_AMG_buildInterpolationOperator(A_p, out->P, out->R);
306                 Paso_SparseMatrix_free(Atemp);  
307                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                                         if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);
308              }                          }
309    
               
310              /*              /*
311                 constructe courser level:                 constructe courser level:
312                                
313              */              */
314              if ( Esys_noError()) {              if ( Esys_noError()) {
315                 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
316              }              }
317    
318              if ( Esys_noError()) {              if ( Esys_noError()) {
319                 if ( out->AMG_C == NULL ) {                out->A_C=A_C;
320                    out->reordering = options->reordering;                if ( out->AMG_C == NULL ) {
321                    out->refinements = options->coarse_matrix_refinements;                    /* merge the system matrix into 1 rank when
322                    /* no coarse level matrix has been constructed. use direct solver */                   it's not suitable coarsening due to the
323                    #ifdef MKL                   total number of unknowns are too small */
324                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                                out->merged_solver= Paso_MergedSolver_alloc(A_C, options);
325                      Paso_SparseMatrix_free(A_C);                }
                     out->A_C->solver_package = PASO_MKL;  
                     if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);  
                   #else  
                     #ifdef UMFPACK  
                        out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);  
                        Paso_SparseMatrix_free(A_C);  
                        out->A_C->solver_package = PASO_UMFPACK;  
                        if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);  
                     #else  
                        out->A_C=A_C;  
                        out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);  
                        out->A_C->solver_package = PASO_SMOOTHER;  
                        if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);  
                     #endif  
                   #endif  
                } else {  
                   /* finally we set some helpers for the solver step */  
                   out->A_C=A_C;  
                }  
326              }                      }        
327            }            }
328             }             }
# Line 238  Paso_Preconditioner_LocalAMG* Paso_Preco Line 330  Paso_Preconditioner_LocalAMG* Paso_Preco
330             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
331          }          }
332       }       }
333    
334    }    }
335    TMPMEMFREE(counter);    TMPMEMFREE(counter);
336    TMPMEMFREE(split_marker);    TMPMEMFREE(F_marker);
337    TMPMEMFREE(degree);    TMPMEMFREE(degree_S);
338      TMPMEMFREE(offset_S);
339    TMPMEMFREE(S);    TMPMEMFREE(S);
340      TMPMEMFREE(degree_ST);
341      TMPMEMFREE(offset_ST);
342      TMPMEMFREE(ST);
343      
344      }
345    
346    if (Esys_noError()) {    if (Esys_noError()) {
347       return out;       return out;
348    } else  {    } else  {
349       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_AMG_free(out);
350       return NULL;       return NULL;
351    }    }
352  }  }
353    
354    
355  void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
356       const dim_t n = amg->n * amg->n_block;       const dim_t n = A->mainBlock->numRows * A->mainBlock->row_block_size;
357       double time0=0;       double time0=0;
358       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
359       const dim_t pre_sweeps=amg->pre_sweeps;       const dim_t pre_sweeps=amg->pre_sweeps;
360    
361       /* presmoothing */       /* presmoothing */
362       time0=Esys_timer();       time0=Esys_timer();
363       Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
364    
365       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
366       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmoothing: %e\n",amg->level, time0);
367       /* end of presmoothing */       /* end of presmoothing */
368            
369       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       time0=Esys_timer();
370           time0=Esys_timer();  
371       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
372       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
373       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
374           time0=Esys_timer()-time0;  
375       /* coarse level solve */       time0=Esys_timer()-time0;
376       if ( amg->AMG_C == NULL) {       /* coarse level solve */
377         if ( amg->AMG_C == NULL) {
378          time0=Esys_timer();          time0=Esys_timer();
379          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
380          switch (amg->A_C->solver_package) {              Paso_MergedSolver_solve(amg->merged_solver,amg->x_C, amg->b_C);
            case (PASO_MKL):  
           Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_UMFPACK):  
           Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);  
           break;  
            case (PASO_SMOOTHER):  
           Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);  
           break;  
         }  
381          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
382       } else {       } else {
383          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
      }    
      time0=time0+Esys_timer();  
      Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */      
       
          /*postsmoothing*/  
         
         /*solve Ax=b with initial guess x */  
         time0=Esys_timer();  
         Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);  
         time0=Esys_timer()-time0;  
         if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);  
         /*end of postsmoothing*/  
       
384       }       }
385       return;  
386        time0=time0+Esys_timer();
387        Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
388    
389        /*postsmoothing*/
390          
391        /*solve Ax=b with initial guess x */
392        time0=Esys_timer();
393        Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
394        time0=Esys_timer()-time0;
395        if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
396        return;
397  }  }
398    
399  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 315  void Paso_Preconditioner_LocalAMG_solve( Line 404  void Paso_Preconditioner_LocalAMG_solve(
404  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
405  */  */
406    
407  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
408                        dim_t *degree, index_t *S,                            dim_t *degree_S, index_t *offset_S, index_t *S,
409                        const double theta, const double tau)                        const double theta, const double tau)
410  {  {
    const dim_t n=A->numRows;  
    index_t iptr, i,j;  
    dim_t kdeg;  
    double max_offdiagonal, threshold, sum_row, main_row, fnorm;  
411    
412       const dim_t my_n=A->mainBlock->numRows;
413       const dim_t overlap_n=A->row_coupleBlock->numRows;
414      
415       index_t iptr, i;
416       double *threshold_p=NULL;
417    
418        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)     threshold_p = TMPMEMALLOC(2*my_n, double);
419        for (i=0;i<n;++i) {    
420                 #pragma omp parallel for private(i,iptr) schedule(static)
421       max_offdiagonal = 0.;     for (i=0;i<my_n;++i) {        
422       sum_row=0;      
423       main_row=0;       register double max_offdiagonal = 0.;
424         register double sum_row=0;
425         register double main_row=0;
426         register dim_t kdeg=0;
427             register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
428    
429            
430         /* collect information for row i: */
431       #pragma ivdep       #pragma ivdep
432       for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
433          j=A->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
434          fnorm=ABS(A->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
           
435          if( j != i) {          if( j != i) {
436             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
437             sum_row+=fnorm;             sum_row+=fnorm;
438          } else {          } else {
439             main_row=fnorm;             main_row=fnorm;
440          }          }
441    
442       }       }
443       threshold = theta*max_offdiagonal;  
444       kdeg=0;       #pragma ivdep
445       if (tau*main_row < sum_row) { /* no diagonal domainance */       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
446          #pragma ivdep          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
447          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {  
448             j=A->pattern->index[iptr];          max_offdiagonal = MAX(max_offdiagonal,fnorm);
449             if(ABS(A->val[iptr])>threshold && i!=j) {          sum_row+=fnorm;
           S[A->pattern->ptr[i]+kdeg] = j;  
           kdeg++;  
            }  
         }  
450       }       }
451       degree[i]=kdeg;  
452             /* inspect row i: */
453             {
454            const double threshold = theta*max_offdiagonal;
455                threshold_p[2*i+1]=threshold;
456            if (tau*main_row < sum_row) { /* no diagonal dominance */
457                   threshold_p[2*i]=1;
458               #pragma ivdep
459               for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
460                  register index_t j=A->mainBlock->pattern->index[iptr];
461                  if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
462                 S[koffset+kdeg] = j;
463                 kdeg++;
464                  }
465               }
466               #pragma ivdep
467               for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
468                  register index_t j=A->col_coupleBlock->pattern->index[iptr];
469                  if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
470                 S[koffset+kdeg] = j + my_n;
471                 kdeg++;
472                  }
473               }
474                } else {
475                   threshold_p[2*i]=-1;
476                }
477             }
478             offset_S[i]=koffset;
479         degree_S[i]=kdeg;
480        }        }
481    
482          /* now we need to distribute the threshold and the diagonal dominance indicator */
483          if (A->mpi_info->size > 1) {
484    
485              const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
486                     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
487          
488              double *remote_threshold=NULL;
489              
490          Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
491              Paso_Coupler_startCollect(threshold_coupler,threshold_p);
492              Paso_Coupler_finishCollect(threshold_coupler);
493              remote_threshold=threshold_coupler->recv_buffer;
494    
495              #pragma omp parallel for private(i,iptr) schedule(static)
496              for (i=0; i<overlap_n; i++) {
497              const double threshold = remote_threshold[2*i+1];
498              register dim_t kdeg=0;
499                  register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
500                  if (remote_threshold[2*i]>0) {
501            #pragma ivdep
502            for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
503                  register index_t j=A->row_coupleBlock->pattern->index[iptr];
504                  if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
505                 S[koffset+kdeg] = j ;
506                 kdeg++;
507                  }
508            }
509    
510            #pragma ivdep
511            for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; iptr++) {
512              register index_t j=A->remote_coupleBlock->pattern->index[iptr];
513              if(ABS(A->remote_coupleBlock->val[iptr])>threshold && i!=j) {
514                  S[koffset+kdeg] = j + my_n;
515                  kdeg++;
516              }
517            }
518                  }
519                  offset_S[i+my_n]=koffset;
520              degree_S[i+my_n]=kdeg;
521              }
522    
523              Paso_Coupler_free(threshold_coupler);
524         }
525         TMPMEMFREE(threshold_p);
526  }  }
527    
528  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 366  void Paso_Preconditioner_AMG_setStrongCo Line 531  void Paso_Preconditioner_AMG_setStrongCo
531    
532  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
533  */  */
534  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
535                              dim_t *degree, index_t *S,                              dim_t *degree_S, index_t *offset_S, index_t *S,
536                              const double theta, const double tau)                              const double theta, const double tau)
537    
538  {  {
539     const dim_t n_block=A->row_block_size;     const dim_t block_size=A->block_size;
540     const dim_t n=A->numRows;     const dim_t my_n=A->mainBlock->numRows;
541     index_t iptr, i,j, bi;     const dim_t overlap_n=A->row_coupleBlock->numRows;
542     dim_t kdeg, max_deg;    
543     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;     index_t iptr, i, bi;
544     double *rtmp;     double *threshold_p=NULL;
545        
546        
547        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)     threshold_p = TMPMEMALLOC(2*my_n, double);
548        {  
549       max_deg=0;     #pragma omp parallel private(i,iptr,bi)
550       #pragma omp for schedule(static)     {
551       for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);    
552          dim_t max_deg=0;
553          double *rtmp=NULL;
554    
555          #pragma omp for schedule(static)
556          for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
557                         +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
558                
559       rtmp=TMPMEMALLOC(max_deg, double);        rtmp=TMPMEMALLOC(max_deg, double);
560                
561       #pragma omp for schedule(static)        #pragma omp for schedule(static)
562       for (i=0;i<n;++i) {        for (i=0;i<my_n;++i) {        
563         register double max_offdiagonal = 0.;
564         register double sum_row=0;
565         register double main_row=0;
566         register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
567         register dim_t kdeg=0;
568         register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
569            
570          max_offdiagonal = 0.;       /* collect information for row i: */
571          sum_row=0;       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
572          main_row=0;          register index_t j=A->mainBlock->pattern->index[iptr];
573          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          register double fnorm=0;
574             j=A->pattern->index[iptr];          #pragma ivdep
575             fnorm=0;          for(bi=0;bi<block_size;++bi) {
576             #pragma ivdep              register double  rtmp2= A->mainBlock->val[iptr*block_size+bi];
577             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];             fnorm+=rtmp2*rtmp2;
578             fnorm=sqrt(fnorm);          }
579             rtmp[iptr-A->pattern->ptr[i]]=fnorm;          fnorm=sqrt(fnorm);
580             if( j != i) {          rtmp[iptr+rtmp_offset]=fnorm;
581            max_offdiagonal = MAX(max_offdiagonal,fnorm);          
582            sum_row+=fnorm;          if( j != i) {
583             } else {             max_offdiagonal = MAX(max_offdiagonal,fnorm);
584            main_row=fnorm;             sum_row+=fnorm;
585             }          } else {
586               main_row=fnorm;
587          }          }
588          threshold = theta*max_offdiagonal;      
589         }
590                
591          kdeg=0;           rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
592          if (tau*main_row < sum_row) { /* no diagonal domainance */       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
593            register double fnorm=0;
594            #pragma ivdep
595            for(bi=0;bi<block_size;++bi) {
596               register double rtmp2 = A->col_coupleBlock->val[iptr*block_size+bi];
597               fnorm+=rtmp2*rtmp2;
598            }
599            fnorm=sqrt(fnorm);
600            
601            rtmp[iptr+rtmp_offset]=fnorm;
602            max_offdiagonal = MAX(max_offdiagonal,fnorm);
603            sum_row+=fnorm;
604         }
605        
606          
607         /* inspect row i: */
608         {
609            const double threshold = theta*max_offdiagonal;
610            rtmp_offset=-A->mainBlock->pattern->ptr[i];
611            
612            threshold_p[2*i+1]=threshold;
613            if (tau*main_row < sum_row) { /* no diagonal dominance */
614               threshold_p[2*i]=1;
615             #pragma ivdep             #pragma ivdep
616             for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
617            j=A->pattern->index[iptr];            register index_t j=A->mainBlock->pattern->index[iptr];
618            if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {            if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
619               S[A->pattern->ptr[i]+kdeg] = j;               S[koffset+kdeg] = j;
620               kdeg++;               kdeg++;
621            }            }
622             }             }
623               rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
624               #pragma ivdep
625               for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
626              register index_t j=A->col_coupleBlock->pattern->index[iptr];
627              if( rtmp[iptr+rtmp_offset] >threshold) {
628                 S[koffset+kdeg] = j + my_n;
629                 kdeg++;
630              }
631               }
632            } else {
633               threshold_p[2*i]=-1;
634            }
635         }
636         offset_S[i]=koffset;
637         degree_S[i]=kdeg;
638          }
639          TMPMEMFREE(rtmp);
640       }
641       /* now we need to distribute the threshold and the diagonal dominance indicator */
642       if (A->mpi_info->size > 1) {
643          
644          const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
645                                 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
646          
647          double *remote_threshold=NULL;
648          
649          Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
650          Paso_Coupler_startCollect(threshold_coupler,threshold_p);
651          Paso_Coupler_finishCollect(threshold_coupler);
652          remote_threshold=threshold_coupler->recv_buffer;
653          
654          #pragma omp parallel for private(i,iptr) schedule(static)
655          for (i=0; i<overlap_n; i++) {
656        
657         const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
658         register dim_t kdeg=0;
659         register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i]+A->remote_coupleBlock->pattern->ptr[i];
660         if (remote_threshold[2*i]>0) {
661            #pragma ivdep
662            for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
663               register index_t j=A->row_coupleBlock->pattern->index[iptr];
664               register double fnorm2=0;
665               #pragma ivdepremote_threshold[2*i]
666               for(bi=0;bi<block_size;++bi) {
667              register double rtmp2 = A->row_coupleBlock->val[iptr*block_size+bi];
668              fnorm2+=rtmp2*rtmp2;
669               }
670              
671               if(fnorm2 > threshold2 ) {
672              S[koffset+kdeg] = j ;
673              kdeg++;
674               }
675          }          }
         degree[i]=kdeg;  
      }        
      TMPMEMFREE(rtmp);  
       } /* end of parallel region */  
   
 }    
676    
677  /* the runge stueben coarsening algorithm: */          #pragma ivdep
678  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,              for (iptr=A->remote_coupleBlock->pattern->ptr[i];iptr<A->remote_coupleBlock->pattern->ptr[i+1]; ++iptr) {
679                          const dim_t* degree, const index_t* S,                 register index_t j=A->remote_coupleBlock->pattern->index[iptr];
680                          index_t*split_marker, const bool_t usePanel)                 register double fnorm2=0;
681                   #pragma ivdepremote_threshold[2*i]
682                   for(bi=0;bi<block_size;++bi) {
683                      register double rtmp2 = A->remote_coupleBlock->val[iptr*block_size+bi];
684                      fnorm2+=rtmp2*rtmp2;
685                   }
686                   if(fnorm2 > threshold2 && i != j) {
687                      S[koffset+kdeg] = j + my_n;
688                      kdeg++;
689                   }
690                }
691            
692         }
693         offset_S[i+my_n]=koffset;
694         degree_S[i+my_n]=kdeg;
695          }
696          Paso_Coupler_free(threshold_coupler);
697       }
698       TMPMEMFREE(threshold_p);
699    }
700    
701    void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
702                                const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
703  {  {
704         index_t i, j;
705     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;     dim_t p;
706     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;     dim_t len=0;
707     register index_t j, itmp;     #pragma omp parallel for private(i) schedule(static)
708         for (i=0; i<nT ;++i) {
709     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */        degree_ST[i]=0;
     
    lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);  
    degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);  
    ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);  
    if (usePanel) {  
       notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);  
       panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);  
710     }     }
711       for (i=0; i<n ;++i) {
712          for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
713       }
714       for (i=0; i<nT ;++i) {
715          offset_ST[i]=len;
716          len+=degree_ST[i];
717          degree_ST[i]=0;
718       }
719       for (i=0; i<n ;++i) {
720          for (p=0; p<degree_S[i]; ++p) {
721           j=S[offset_S[i]+p];
722           ST[offset_ST[j]+degree_ST[j]]=i;
723           degree_ST[j]++;
724          }
725       }
726    }
727    
728    int compareindex(const void *a, const void *b)
729    {
730      return (*(int *)a - *(int *)b);
731    }
732    
733    void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
734                            const dim_t* degree_S, const index_t* offset_S, const index_t* S,
735                            const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
736                            Paso_Connector* col_connector, Paso_Distribution* col_dist)
737    {
738       dim_t i, numUndefined,   iter=0;
739      index_t iptr, jptr, kptr;
740      double *random=NULL, *w=NULL, *Status=NULL;
741      index_t * ST_flag=NULL;
742    
743      Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector  ,1);
744        
745        w=TMPMEMALLOC(n, double);
746        Status=TMPMEMALLOC(n, double);
747     if (Esys_noError() ) {    random = Paso_Distribution_createRandomVector(col_dist,1);
748        /* initialize  split_marker and split_marker :*/    ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
749        /* those unknows which are not influenced go into F, the rest is available for F or C */  
750        #pragma omp parallel for private(i) schedule(static)    #pragma omp parallel for private(i)
751        for (i=0;i<n;++i) {    for (i=0; i< my_n; ++i) {
752       degreeT[i]=0;        w[i]=degree_ST[i]+random[i];
753       if (degree[i]>0) {        if (degree_ST[i] < 1) {
754          lambda[i]=0;         Status[i]=-100; /* F point  */
755          split_marker[i]=PASO_AMG_UNDECIDED;        } else {
756       } else {         Status[i]=1; /* status undefined */
         split_marker[i]=PASO_AMG_IN_F;  
         lambda[i]=-1;  
      }  
757        }        }
758        /* create transpose :*/    }
759        for (i=0;i<n;++i) {  
760          for (p=0; p<degree[i]; ++p) {    #pragma omp parallel for private(i, iptr)
761             j=S[offset[i]+p];    for (i=0; i< n; ++i) {
762             ST[offset[j]+degreeT[j]]=i;        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {
763             degreeT[j]++;       ST_flag[offset_ST[i]+iptr]=1;
         }  
764        }        }
765        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */    }  
766        #pragma omp parallel for private(i, j, itmp) schedule(static)  
767        for (i=0;i<n;++i) {    
768       if (split_marker[i]==PASO_AMG_UNDECIDED) {    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
769          itmp=lambda[i];    /* printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);  */
770          for (p=0; p<degreeT[i]; ++p) {    iter=0;
771             j=ST[offset[i]+p];    while (numUndefined > 0) {
772             if (split_marker[j]==PASO_AMG_UNDECIDED) {       Paso_Coupler_fillOverlap(n, w, w_coupler);
773            itmp++;  
774             } else {  /* at this point there are no C points */        /* calculate the maximum value of neighbours following active strong connections:
775            itmp+=2;          w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) connection is still active  */      
776          #pragma omp parallel for private(i, iptr)
777          for (i=0; i<my_n; ++i) {
778         if (Status[i]>0) { /* status is still undefined */
779    
780            register bool_t inD=TRUE;
781            const double wi=w[i];
782    
783            for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
784               const index_t k=S[offset_S[i]+iptr];
785               const index_t* start_p = &ST[offset_ST[k]];
786               const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
787    
788               if (ST_flag[offset_ST[k] + (index_t)(where_p-start_p)]>0) {
789              if (wi <= w[k] ) {
790                 inD=FALSE;
791                 break;
792              }
793             }             }
794            
795            }
796            
797            if (inD) {
798              for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
799                 const index_t k=ST[offset_ST[i]+iptr];
800                 if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
801                           if (wi <= w[k] ) {
802                   inD=FALSE;
803                   break;
804                }
805                 }
806              }
807            }    
808            if (inD) {
809               Status[i]=0.; /* is in D */
810          }          }
         lambda[i]=itmp;  
811       }       }
812        
813        }        }
       if (usePanel) {  
      #pragma omp parallel for private(i) schedule(static)  
      for (i=0;i<n;++i) notInPanel[i]=TRUE;  
       }  
       /* start search :*/  
       i=Paso_Util_arg_max(n,lambda);  
       while (lambda[i]>-1) { /* is there any undecided unknown? */  
   
      if (usePanel) {  
         len_panel=0;  
         do {  
            /* the unknown i is moved to C */  
            split_marker[i]=PASO_AMG_IN_C;  
            lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
             
            /* all undecided unknown strongly coupled to i are moved to F */  
            for (p=0; p<degreeT[i]; ++p) {  
           j=ST[offset[i]+p];  
             
           if (split_marker[j]==PASO_AMG_UNDECIDED) {  
               
              split_marker[j]=PASO_AMG_IN_F;  
              lambda[j]=-1;  
               
              for (q=0; q<degreeT[j]; ++q) {  
             k=ST[offset[j]+q];  
             if (split_marker[k]==PASO_AMG_UNDECIDED) {  
                lambda[k]++;  
                if (notInPanel[k]) {  
                   notInPanel[k]=FALSE;  
                   panel[len_panel]=k;  
                   len_panel++;  
                }  
814    
815              }    /* the unknown i is moved to C */        Paso_Coupler_fillOverlap(n, Status, w_coupler);
816              split_marker[i]=PASO_AMG_IN_C;  
817              lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
818               }       /*   remove connection to D points :
819                    
820               for each i in D:
821              for each j in S_i:
822                 w[j]--
823                 ST_tag[j,i]=-1
824              for each j in ST[i]:
825                 ST_tag[i,j]=-1
826                 for each k in ST[j]:
827                if k in ST[i]:
828                   w[j]--;
829                ST_tag[j,k]=-1
830                
831         */
832         /* w is updated  for local rows only */
833         {
834            #pragma omp parallel for private(i, jptr)
835            for (i=0; i< my_n; ++i) {
836    
837               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
838              const index_t j=ST[offset_ST[i]+jptr];
839              if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
840                 w[i]--;
841                 ST_flag[offset_ST[i]+jptr]=-1;
842            }            }
843             }             }
844             for (p=0; p<degree[i]; ++p) {            
845            j=S[offset[i]+p];          }
846            if (split_marker[j]==PASO_AMG_UNDECIDED) {          #pragma omp parallel for private(i, jptr)
847               lambda[j]--;          for (i=my_n; i< n; ++i) {
848               if (notInPanel[j]) {             for (jptr=0; jptr<degree_ST[i]; ++jptr) {
849              notInPanel[j]=FALSE;            const index_t j = ST[offset_ST[i]+jptr];
850              panel[len_panel]=j;            if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
             len_panel++;  
              }  
           }  
851             }             }
852            }
853    
854            
855            for (i=0; i< n; ++i) {
856               if ( Status[i] == 0. ) {
857    
858             /* consolidate panel */                       const index_t* start_p = &ST[offset_ST[i]];
859             /* remove lambda[q]=-1 */  
860             lambda_max=-1;               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
861             i=-1;              const index_t j=ST[offset_ST[i]+jptr];
862             len_panel_new=0;              ST_flag[offset_ST[i]+jptr]=-1;
863             for (q=0; q<len_panel; q++) {              for (kptr=0; kptr<degree_ST[j]; ++kptr) {
864               k=panel[q];                 const index_t k=ST[offset_ST[j]+kptr];
865               lambda_k=lambda[k];                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
866               if (split_marker[k]==PASO_AMG_UNDECIDED) {                    if (ST_flag[offset_ST[j]+kptr] >0) {
867              panel[len_panel_new]=k;                   if (j< my_n ) {
868              len_panel_new++;                      w[j]--;
869                     }
870              if (lambda_max == lambda_k) {                   ST_flag[offset_ST[j]+kptr]=-1;
871                 if (k<i) i=k;                    }
872              } else if (lambda_max < lambda_k) {                 }
                lambda_max =lambda_k;  
                i=k;  
873              }              }
874               }               }
            }  
            len_panel=len_panel_new;  
         } while (len_panel>0);      
      } else {  
         /* the unknown i is moved to C */  
         split_marker[i]=PASO_AMG_IN_C;  
         lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
           
         /* all undecided unknown strongly coupled to i are moved to F */  
         for (p=0; p<degreeT[i]; ++p) {  
            j=ST[offset[i]+p];  
            if (split_marker[j]==PASO_AMG_UNDECIDED) {  
           
           split_marker[j]=PASO_AMG_IN_F;  
           lambda[j]=-1;  
           
           for (q=0; q<degreeT[j]; ++q) {  
              k=ST[offset[j]+q];  
              if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;  
875            }            }
   
            }  
876          }          }
877          for (p=0; p<degree[i]; ++p) {       }
878             j=S[offset[i]+p];  
879             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;       /* adjust status */
880         #pragma omp parallel for private(i)
881         for (i=0; i< my_n; ++i) {
882            if ( Status[i] == 0. ) {
883               Status[i] = -10;   /* this is now a C point */
884            } else if (Status[i] == 1. && w[i]<1.) {
885               Status[i] = -100;   /* this is now a F point */  
886          }          }
           
887       }       }
888       i=Paso_Util_arg_max(n,lambda);      
889        }       i = numUndefined;
890     }       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
891     TMPMEMFREE(lambda);       if (numUndefined == i) {
892     TMPMEMFREE(ST);         Esys_setError(SYSTEM_ERROR, "Can NOT reduce numUndefined.");
893     TMPMEMFREE(degreeT);         return;
894     TMPMEMFREE(panel);       }
895     TMPMEMFREE(notInPanel);  
896         iter++;
897         /* printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined); */
898    
899      } /* end of while loop */
900    
901      /* map to output :*/
902      Paso_Coupler_fillOverlap(n, Status, w_coupler);
903      #pragma omp parallel for private(i)
904      for (i=0; i< n; ++i) {
905         if (Status[i] > -50.) {
906            split_marker[i]=PASO_AMG_IN_C;
907         } else {
908            split_marker[i]=PASO_AMG_IN_F;
909         }
910      }
911      /* clean up : */
912      Paso_Coupler_free(w_coupler);
913      TMPMEMFREE(random);
914      TMPMEMFREE(w);
915      TMPMEMFREE(Status);
916      TMPMEMFREE(ST_flag);
917      
918      return;
919  }  }
920    

Legend:
Removed from v.3352  
changed lines
  Added in v.4154

  ViewVC Help
Powered by ViewVC 1.1.26