/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3352 by gross, Tue Nov 16 03:58:09 2010 UTC revision 3489 by caltinay, Wed Mar 30 00:46:04 2011 UTC
# Line 14  Line 14 
14    
15  /**************************************************************/  /**************************************************************/
16    
17  /* Paso: AMG preconditioner                                  */  /* Paso: AMG preconditioner  (local version)                  */
18    
19  /**************************************************************/  /**************************************************************/
20    
# Line 30  Line 30 
30  #include "PasoUtil.h"  #include "PasoUtil.h"
31  #include "UMFPACK.h"  #include "UMFPACK.h"
32  #include "MKL.h"  #include "MKL.h"
33    #include<stdio.h>
34    
35    
36  /**************************************************************/  /**************************************************************/
37    
38  /* free all memory used by AMG                                */  /* free all memory used by AMG                                */
39    
40  void Paso_Preconditioner_LocalAMG_free(Paso_Preconditioner_LocalAMG * in) {  void Paso_Preconditioner_AMG_free(Paso_Preconditioner_AMG * in) {
41       if (in!=NULL) {       if (in!=NULL) {
42      Paso_Preconditioner_LocalSmoother_free(in->Smoother);      Paso_Preconditioner_Smoother_free(in->Smoother);
43      Paso_SparseMatrix_free(in->P);      Paso_SystemMatrix_free(in->P);
44      Paso_SparseMatrix_free(in->R);      Paso_SystemMatrix_free(in->R);
45      Paso_SparseMatrix_free(in->A_C);      Paso_SystemMatrix_free(in->A_C);
46      Paso_Preconditioner_LocalAMG_free(in->AMG_C);      Paso_Preconditioner_AMG_free(in->AMG_C);
47      MEMFREE(in->r);      MEMFREE(in->r);
48      MEMFREE(in->x_C);      MEMFREE(in->x_C);
49      MEMFREE(in->b_C);      MEMFREE(in->b_C);
50    
       
51      MEMFREE(in);      MEMFREE(in);
52       }       }
53  }  }
54    
55    index_t Paso_Preconditioner_AMG_getMaxLevel(const Paso_Preconditioner_AMG * in) {
56       if (in->AMG_C == NULL) {
57          return in->level;
58       } else {
59          return Paso_Preconditioner_AMG_getMaxLevel(in->AMG_C);
60       }
61    }
62    double Paso_Preconditioner_AMG_getCoarseLevelSparsity(const Paso_Preconditioner_AMG * in) {
63          if (in->AMG_C == NULL) {
64         if (in->A_C == NULL) {
65            return 1.;
66         } else {
67            return Paso_SystemMatrix_getSparsity(in->A_C);
68         }
69          } else {
70            return Paso_Preconditioner_AMG_getCoarseLevelSparsity(in->AMG_C);
71          }
72    }
73    dim_t Paso_Preconditioner_AMG_getNumCoarseUnknwons(const Paso_Preconditioner_AMG * in) {
74       if (in->AMG_C == NULL) {
75          if (in->A_C == NULL) {
76         return 0;
77          } else {
78         return Paso_SystemMatrix_getTotalNumRows(in->A_C);
79          }
80       } else {
81         return Paso_Preconditioner_AMG_getNumCoarseUnknwons(in->AMG_C);
82       }
83    }
84  /*****************************************************************  /*****************************************************************
85    
86     constructs AMG     constructs AMG
87        
88  ******************************************************************/  ******************************************************************/
89  Paso_Preconditioner_LocalAMG* Paso_Preconditioner_LocalAMG_alloc(Paso_SparseMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
90    
91    Paso_Preconditioner_LocalAMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
92    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
93    
94      const dim_t my_n=A_p->mainBlock->numRows;
95      const dim_t overlap_n=A_p->row_coupleBlock->numRows;
96        
97    Paso_SparseMatrix *Atemp=NULL, *A_C=NULL;    const dim_t n = my_n + overlap_n;
98    const dim_t n=A_p->numRows;  
99    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
100    index_t* split_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F=NULL, *S=NULL, *degree=NULL;    index_t* F_marker=NULL, *counter=NULL;
101    dim_t n_F=0, n_C=0, i;    dim_t i;
102    double time0=0;    double time0=0;
103    const double theta = options->coarsening_threshold;    const double theta = options->coarsening_threshold;
104    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
105        const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
106      const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
107    
108        
109    /*    /*
110        is the input matrix A suitable for coarsening        is the input matrix A suitable for coarsening
111                
112    */    */
113    if ( (A_p->pattern->len >= options->min_coarse_sparsity * n * n ) || (n <= options->min_coarse_matrix_size) || (level > options->level_max) ) {    if ( (sparsity >= options->min_coarse_sparsity) ||
114       if (verbose) printf("Paso_Solver: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",         (total_n <= options->min_coarse_matrix_size) ||
115      level,  options->level_max, A_p->pattern->len/(1.*n * n), options->min_coarse_sparsity, n, options->min_coarse_matrix_size  );           (level > options->level_max) ) {
116       return NULL;  
117    }          if (verbose) {
118              /*
119                  print stopping condition:
120                          - 'SPAR' = min_coarse_matrix_sparsity exceeded
121                          - 'SIZE' = min_coarse_matrix_size exceeded
122                          - 'LEVEL' = level_max exceeded
123              */
124              printf("Paso_Preconditioner: AMG: termination of coarsening by ");
125    
126              if (sparsity >= options->min_coarse_sparsity)
127                  printf("SPAR ");
128    
129              if (total_n <= options->min_coarse_matrix_size)
130                  printf("SIZE ");
131    
132              if (level > options->level_max)
133                  printf("LEVEL ");
134    
135              printf("\n");
136    
137            printf("Paso_Preconditioner: AMG level %d (limit = %d) stopped. sparsity = %e (limit = %e), unknowns = %d (limit = %d)\n",
138               level,  options->level_max, sparsity, options->min_coarse_sparsity, total_n, options->min_coarse_matrix_size);  
139    
140           }
141    
142           return NULL;
143      }  else {
144       /* Start Coarsening : */       /* Start Coarsening : */
145        
146         /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
147         const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;
148    
149       split_marker=TMPMEMALLOC(n,index_t);       dim_t* degree_S=TMPMEMALLOC(n, dim_t);
150         index_t *offset_S=TMPMEMALLOC(n, index_t);
151         index_t *S=TMPMEMALLOC(len_S, index_t);
152         dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
153         index_t *offset_ST=TMPMEMALLOC(n, index_t);
154         index_t *ST=TMPMEMALLOC(len_S, index_t);
155        
156        
157         F_marker=TMPMEMALLOC(n,index_t);
158       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
159       degree=TMPMEMALLOC(n, dim_t);  
160       S=TMPMEMALLOC(A_p->pattern->len, index_t);       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
161       if ( !( Esys_checkPtr(split_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree) || Esys_checkPtr(S) ) ) {      || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
162       /*      /*
163               make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
164        */
165            Paso_SystemMatrix_copyColCoupleBlock(A_p);
166            /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */
167    
168        /*
169            set splitting of unknows:            set splitting of unknows:
170                  
171           */           */
172       time0=Esys_timer();       time0=Esys_timer();
173       if (n_block>1) {       if (n_block>1) {
174             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections_Block(A_p, degree_S, offset_S, S, theta,tau);
175       } else {       } else {
176             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
177       }       }
178       Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree, S, split_marker, options->usePanel);       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
      options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);  
179            
180    {
181       dim_t p;
182       for (i=0; i<n; ++i) {
183             printf("%d: ",i);
184            for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
185            printf("\n");
186       }
187    }
188         Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189                                degree_S, offset_S, S, degree_ST, offset_ST, ST,
190                            A_p->col_coupler->connector,A_p->col_distribution);
191    Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
192    return NULL;
193          
194    
195    /*MPI:
196         Paso_Preconditioner_AMG_RungeStuebenSearch(n, A_p->pattern->ptr, degree_S, S, F_marker, options->usePanel);
197    */
198        
199             /* in BoomerAMG interpolation is used FF connectiovity is required :*/
200    /*MPI:
201             if (options->interpolation_method == PASO_CLASSIC_INTERPOLATION_WITH_FF_COUPLING)
202                                 Paso_Preconditioner_AMG_enforceFFConnectivity(n, A_p->pattern->ptr, degree_S, S, F_marker);  
203    */
204    
205         options->coarsening_selection_time=Esys_timer()-time0 + MAX(0, options->coarsening_selection_time);
206    
207    #ifdef AAAAA
208       if (Esys_noError() ) {       if (Esys_noError() ) {
209          #pragma omp parallel for private(i) schedule(static)          #pragma omp parallel for private(i) schedule(static)
210          for (i = 0; i < n; ++i) split_marker[i]= (split_marker[i] == PASO_AMG_IN_F);          for (i = 0; i < n; ++i) F_marker[i]=(F_marker[i] ==  PASO_AMG_IN_F);
211            
212          /*          /*
213             count number of unkowns to be eliminated:             count number of unkowns to be eliminated:
214          */          */
215          n_F=Paso_Util_cumsum_maskedTrue(n,counter, split_marker);          n_F=Paso_Util_cumsum_maskedTrue(n,counter, F_marker);
216          n_C=n-n_F;          n_C=n-n_F;
217          if (verbose) printf("Paso_Solver: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);          if (verbose) printf("Paso_Preconditioner: AMG level %d: %d unknowns are flagged for elimination. %d left.\n",level,n_F,n-n_F);
218            
219          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */
220             out = NULL;             out = NULL;
221          } else {          } else {
222             out=MEMALLOC(1,Paso_Preconditioner_LocalAMG);             out=MEMALLOC(1,Paso_Preconditioner_AMG);
223             if (! Esys_checkPtr(out)) {             if (! Esys_checkPtr(out)) {
224            out->level = level;            out->level = level;
225            out->n = n;            out->n = n;
# Line 137  Paso_Preconditioner_LocalAMG* Paso_Preco Line 242  Paso_Preconditioner_LocalAMG* Paso_Preco
242             Esys_checkPtr(rows_in_F);             Esys_checkPtr(rows_in_F);
243             if ( Esys_noError() ) {             if ( Esys_noError() ) {
244    
245            out->Smoother = Paso_Preconditioner_LocalSmoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);            out->Smoother = Paso_Preconditioner_Smoother_alloc(A_p, (options->smoother == PASO_JACOBI), verbose);
246                
247            if ( n_F < n ) { /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */            if (n_C != 0) {
248                   /* if nothing is been removed we have a diagonal dominant matrix and we just run a few steps of the smoother */
249        
250              /* allocate helpers :*/              /* allocate helpers :*/
251              out->x_C=MEMALLOC(n_block*n_C,double);              out->x_C=MEMALLOC(n_block*n_C,double);
# Line 156  Paso_Preconditioner_LocalAMG* Paso_Preco Line 262  Paso_Preconditioner_LocalAMG* Paso_Preco
262                 {                 {
263                    #pragma omp for schedule(static)                    #pragma omp for schedule(static)
264                    for (i = 0; i < n; ++i) {                    for (i = 0; i < n; ++i) {
265                   if  (split_marker[i]) rows_in_F[counter[i]]=i;                   if  (F_marker[i]) rows_in_F[counter[i]]=i;
266                    }                    }
267                 }                 }
268                 /*  create mask of C nodes with value >-1 gives new id */                 /*  create mask of C nodes with value >-1 gives new id */
269                 i=Paso_Util_cumsum_maskedFalse(n,counter, split_marker);                 i=Paso_Util_cumsum_maskedFalse(n,counter, F_marker);
270    
271                 #pragma omp parallel for private(i) schedule(static)                 #pragma omp parallel for private(i) schedule(static)
272                 for (i = 0; i < n; ++i) {                 for (i = 0; i < n; ++i) {
273                    if  (split_marker[i]) {                    if  (F_marker[i]) {
274                   mask_C[i]=-1;                   mask_C[i]=-1;
275                    } else {                    } else {
276                   mask_C[i]=counter[i];;                   mask_C[i]=counter[i];;
277                    }                    }
278                 }                 }
279                 /*                 /*
280                    get Restriction :                      get Prolongation :    
281                 */                                   */                  
282                 time0=Esys_timer();                 time0=Esys_timer();
283                 out->P=Paso_Preconditioner_AMG_getDirectProlongation(A_p,degree,S,n_C,mask_C);  /*MPI:
284                   out->P=Paso_Preconditioner_AMG_getProlongation(A_p,A_p->pattern->ptr, degree_S,S,n_C,mask_C, options->interpolation_method);
285    */
286                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);                 if (SHOW_TIMING) printf("timing: level %d: getProlongation: %e\n",level, Esys_timer()-time0);
287              }              }
288              /*                    /*      
289                 construct Prolongation operator as transposed of restriction operator:                 construct Restriction operator as transposed of Prolongation operator:
290              */              */
291              if ( Esys_noError()) {              if ( Esys_noError()) {
292                 time0=Esys_timer();                 time0=Esys_timer();
293                 out->R=Paso_SparseMatrix_getTranspose(out->P);  /*MPI:
294                 if (SHOW_TIMING) printf("timing: level %d: Paso_SparseMatrix_getTranspose: %e\n",level,Esys_timer()-time0);                 out->R=Paso_SystemMatrix_getTranspose(out->P);
295    */
296                   if (SHOW_TIMING) printf("timing: level %d: Paso_SystemMatrix_getTranspose: %e\n",level,Esys_timer()-time0);
297              }                    }      
298              /*              /*
299              construct coarse level matrix:              construct coarse level matrix:
300              */              */
301              if ( Esys_noError()) {              if ( Esys_noError()) {
302                 time0=Esys_timer();                 time0=Esys_timer();
303                 Atemp=Paso_SparseMatrix_MatrixMatrix(A_p,out->P);  /*MPI:
304                 A_C=Paso_SparseMatrix_MatrixMatrix(out->R,Atemp);                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
305                 Paso_SparseMatrix_free(Atemp);                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
306                   Paso_Preconditioner_AMG_setStrongConnections
307                   Paso_SystemMatrix_free(Atemp);
308    */
309    
310                 if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);                             if (SHOW_TIMING) printf("timing: level %d : construct coarse matrix: %e\n",level,Esys_timer()-time0);            
311              }              }
312    
# Line 202  Paso_Preconditioner_LocalAMG* Paso_Preco Line 316  Paso_Preconditioner_LocalAMG* Paso_Preco
316                                
317              */              */
318              if ( Esys_noError()) {              if ( Esys_noError()) {
319                 out->AMG_C=Paso_Preconditioner_LocalAMG_alloc(A_C,level+1,options);                 out->AMG_C=Paso_Preconditioner_AMG_alloc(A_C,level+1,options);
320              }              }
321              if ( Esys_noError()) {              if ( Esys_noError()) {
322                 if ( out->AMG_C == NULL ) {                 if ( out->AMG_C == NULL ) {
# Line 210  Paso_Preconditioner_LocalAMG* Paso_Preco Line 324  Paso_Preconditioner_LocalAMG* Paso_Preco
324                    out->refinements = options->coarse_matrix_refinements;                    out->refinements = options->coarse_matrix_refinements;
325                    /* no coarse level matrix has been constructed. use direct solver */                    /* no coarse level matrix has been constructed. use direct solver */
326                    #ifdef MKL                    #ifdef MKL
327                      out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);                      out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_OFFSET1, A_C);
328                      Paso_SparseMatrix_free(A_C);                      Paso_SystemMatrix_free(A_C);
329                      out->A_C->solver_package = PASO_MKL;                      out->A_C->solver_package = PASO_MKL;
330                      if (verbose) printf("Paso_Solver: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                      if (verbose) printf("Paso_Preconditioner: AMG: use MKL direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
331                    #else                    #else
332                      #ifdef UMFPACK                      #ifdef UMFPACK
333                         out->A_C=Paso_SparseMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);                         out->A_C=Paso_SystemMatrix_unroll(MATRIX_FORMAT_BLK1 + MATRIX_FORMAT_CSC, A_C);
334                         Paso_SparseMatrix_free(A_C);                         Paso_SystemMatrix_free(A_C);
335                         out->A_C->solver_package = PASO_UMFPACK;                         out->A_C->solver_package = PASO_UMFPACK;
336                         if (verbose) printf("Paso_Solver: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use UMFPACK direct solver on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
337                      #else                      #else
338                         out->A_C=A_C;                         out->A_C=A_C;
339                         out->A_C->solver_p=Paso_Preconditioner_LocalSmoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);                         out->A_C->solver_p=Paso_Preconditioner_Smoother_alloc(out->A_C, (options->smoother == PASO_JACOBI), verbose);
340                         out->A_C->solver_package = PASO_SMOOTHER;                         out->A_C->solver_package = PASO_SMOOTHER;
341                         if (verbose) printf("Paso_Solver: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C);                         if (verbose) printf("Paso_Preconditioner: AMG: use smoother on the coarsest level (number of unknowns = %d).\n",n_C*n_block);
342                      #endif                      #endif
343                    #endif                    #endif
344                 } else {                 } else {
# Line 238  Paso_Preconditioner_LocalAMG* Paso_Preco Line 352  Paso_Preconditioner_LocalAMG* Paso_Preco
352             TMPMEMFREE(rows_in_F);             TMPMEMFREE(rows_in_F);
353          }          }
354       }       }
355    #endif
356    
357    }    }
358    TMPMEMFREE(counter);    TMPMEMFREE(counter);
359    TMPMEMFREE(split_marker);    TMPMEMFREE(F_marker);
360    TMPMEMFREE(degree);    TMPMEMFREE(degree_S);
361      TMPMEMFREE(offset_S);
362    TMPMEMFREE(S);    TMPMEMFREE(S);
363      TMPMEMFREE(degree_ST);
364      TMPMEMFREE(offset_ST);
365      TMPMEMFREE(ST);
366      
367      }
368    
369    if (Esys_noError()) {    if (Esys_noError()) {
370       return out;       return out;
371    } else  {    } else  {
372       Paso_Preconditioner_LocalAMG_free(out);       Paso_Preconditioner_AMG_free(out);
373       return NULL;       return NULL;
374    }    }
375  }  }
376    
377    
378  void Paso_Preconditioner_LocalAMG_solve(Paso_SparseMatrix* A, Paso_Preconditioner_LocalAMG * amg, double * x, double * b) {  void Paso_Preconditioner_AMG_solve(Paso_SystemMatrix* A, Paso_Preconditioner_AMG * amg, double * x, double * b) {
379       const dim_t n = amg->n * amg->n_block;       const dim_t n = amg->n * amg->n_block;
380       double time0=0;       double time0=0;
381       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
# Line 261  void Paso_Preconditioner_LocalAMG_solve( Line 383  void Paso_Preconditioner_LocalAMG_solve(
383    
384       /* presmoothing */       /* presmoothing */
385       time0=Esys_timer();       time0=Esys_timer();
386       Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);       Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, pre_sweeps, FALSE);
387       time0=Esys_timer()-time0;       time0=Esys_timer()-time0;
388       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);       if (SHOW_TIMING) printf("timing: level %d: Presmooting: %e\n",amg->level, time0);
389       /* end of presmoothing */       /* end of presmoothing */
# Line 269  void Paso_Preconditioner_LocalAMG_solve( Line 391  void Paso_Preconditioner_LocalAMG_solve(
391       if (amg->n_F < amg->n) { /* is there work on the coarse level? */       if (amg->n_F < amg->n) { /* is there work on the coarse level? */
392           time0=Esys_timer();           time0=Esys_timer();
393       Paso_Copy(n, amg->r, b);                            /*  r <- b */       Paso_Copy(n, amg->r, b);                            /*  r <- b */
394       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(-1.,A,x,1.,amg->r); /*r=r-Ax*/
395       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */       Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->R,amg->r,0.,amg->b_C);  /* b_c = R*r  */
396           time0=Esys_timer()-time0;           time0=Esys_timer()-time0;
397       /* coarse level solve */       /* coarse level solve */
398       if ( amg->AMG_C == NULL) {       if ( amg->AMG_C == NULL) {
399          time0=Esys_timer();          time0=Esys_timer();
400          /*  A_C is the coarsest level */          /*  A_C is the coarsest level */
401    #ifdef FIXME
402          switch (amg->A_C->solver_package) {          switch (amg->A_C->solver_package) {
403             case (PASO_MKL):             case (PASO_MKL):
404            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);            Paso_MKL(amg->A_C, amg->x_C,amg->b_C, amg->reordering, amg->refinements, SHOW_TIMING);
# Line 284  void Paso_Preconditioner_LocalAMG_solve( Line 407  void Paso_Preconditioner_LocalAMG_solve(
407            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);            Paso_UMFPACK(amg->A_C, amg->x_C,amg->b_C, amg->refinements, SHOW_TIMING);
408            break;            break;
409             case (PASO_SMOOTHER):             case (PASO_SMOOTHER):
410            Paso_Preconditioner_LocalSmoother_solve(amg->A_C, amg->Smoother,amg->x_C,amg->b_C,pre_sweeps, FALSE);            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
411            break;            break;
412          }          }
413    #endif
414            Paso_Preconditioner_Smoother_solve(amg->A_C, amg->A_C->solver_p,amg->x_C,amg->b_C,pre_sweeps+post_sweeps, FALSE);
415          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);          if (SHOW_TIMING) printf("timing: level %d: DIRECT SOLVER: %e\n",amg->level,Esys_timer()-time0);
416       } else {       } else {
417          Paso_Preconditioner_LocalAMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */          Paso_Preconditioner_AMG_solve(amg->A_C, amg->AMG_C,amg->x_C,amg->b_C); /* x_C=AMG(b_C)     */
418       }         }  
419       time0=time0+Esys_timer();       time0=time0+Esys_timer();
420       Paso_SparseMatrix_MatrixVector_CSR_OFFSET0_DIAG(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(1.,amg->P,amg->x_C,1.,x); /* x = x + P*x_c */    
421            
422           /*postsmoothing*/           /*postsmoothing*/
423                
424          /*solve Ax=b with initial guess x */          /*solve Ax=b with initial guess x */
425          time0=Esys_timer();          time0=Esys_timer();
426          Paso_Preconditioner_LocalSmoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);          Paso_Preconditioner_Smoother_solve(A, amg->Smoother, x, b, post_sweeps, TRUE);
427          time0=Esys_timer()-time0;          time0=Esys_timer()-time0;
428          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);          if (SHOW_TIMING) printf("timing: level %d: Postsmoothing: %e\n",amg->level,time0);
429          /*end of postsmoothing*/          /*end of postsmoothing*/
# Line 315  void Paso_Preconditioner_LocalAMG_solve( Line 440  void Paso_Preconditioner_LocalAMG_solve(
440  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|  in the sense that |A_{ij}| >= theta * max_k |A_{ik}|
441  */  */
442    
443  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections(Paso_SystemMatrix* A,
444                        dim_t *degree, index_t *S,                            dim_t *degree_S, index_t *offset_S, index_t *S,
445                        const double theta, const double tau)                        const double theta, const double tau)
446  {  {
    const dim_t n=A->numRows;  
    index_t iptr, i,j;  
    dim_t kdeg;  
    double max_offdiagonal, threshold, sum_row, main_row, fnorm;  
447    
448       const dim_t my_n=A->mainBlock->numRows;
449       const dim_t overlap_n=A->row_coupleBlock->numRows;
450      
451       index_t iptr, i;
452       double *threshold_p=NULL;
453    
454        #pragma omp parallel for private(i,iptr,max_offdiagonal, threshold,j, kdeg, sum_row, main_row, fnorm) schedule(static)  
455        for (i=0;i<n;++i) {     threshold_p = TMPMEMALLOC(2*my_n, double);
456                
457       max_offdiagonal = 0.;     #pragma omp parallel for private(i,iptr) schedule(static)
458       sum_row=0;     for (i=0;i<my_n;++i) {        
459       main_row=0;      
460         register double max_offdiagonal = 0.;
461         register double sum_row=0;
462         register double main_row=0;
463         register dim_t kdeg=0;
464             register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
465            
466         /* collect information for row i: */
467       #pragma ivdep       #pragma ivdep
468       for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
469          j=A->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
470          fnorm=ABS(A->val[iptr]);          register double fnorm=ABS(A->mainBlock->val[iptr]);
471                    
472          if( j != i) {          if( j != i) {
473             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
# Line 342  void Paso_Preconditioner_AMG_setStrongCo Line 475  void Paso_Preconditioner_AMG_setStrongCo
475          } else {          } else {
476             main_row=fnorm;             main_row=fnorm;
477          }          }
478    
479       }       }
480       threshold = theta*max_offdiagonal;      
481       kdeg=0;       #pragma ivdep
482       if (tau*main_row < sum_row) { /* no diagonal domainance */       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
483          #pragma ivdep          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
484          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {          max_offdiagonal = MAX(max_offdiagonal,fnorm);
485             j=A->pattern->index[iptr];          sum_row+=fnorm;
            if(ABS(A->val[iptr])>threshold && i!=j) {  
           S[A->pattern->ptr[i]+kdeg] = j;  
           kdeg++;  
            }  
         }  
486       }       }
487       degree[i]=kdeg;  
488             /* inspect row i: */
489             {
490            const double threshold = theta*max_offdiagonal;
491                threshold_p[2*i+1]=threshold;
492            if (tau*main_row < sum_row) { /* no diagonal domainance */
493                   threshold_p[2*i]=1;
494               #pragma ivdep
495               for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
496                  register index_t j=A->mainBlock->pattern->index[iptr];
497                  if(ABS(A->mainBlock->val[iptr])>threshold && i!=j) {
498                 S[koffset+kdeg] = j;
499                 kdeg++;
500                  }
501               }
502               #pragma ivdep
503               for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
504                  register index_t j=A->col_coupleBlock->pattern->index[iptr];
505                  if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
506                 S[koffset+kdeg] = j + my_n;
507                 kdeg++;
508                  }
509               }
510                } else {
511                   threshold_p[2*i]=-1;
512                }
513             }
514             offset_S[i]=koffset;
515         degree_S[i]=kdeg;
516        }        }
517          /* now we need to distribute the threshold and the diagonal dominance indicator */
518          if (A->mpi_info->size > 1) {
519    
520              const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
521                     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
522          
523              double *remote_threshold=NULL;
524              
525          Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
526              Paso_Coupler_startCollect(threshold_coupler,threshold_p);
527              Paso_Coupler_finishCollect(threshold_coupler);
528              remote_threshold=threshold_coupler->recv_buffer;
529    
530              #pragma omp parallel for private(i,iptr) schedule(static)
531              for (i=0; i<overlap_n; i++) {
532            
533              const double threshold = remote_threshold[2*i+1];
534              register dim_t kdeg=0;
535                  register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
536                  if (remote_threshold[2*i]>0) {
537                 #pragma ivdep
538                 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
539                  register index_t j=A->row_coupleBlock->pattern->index[iptr];
540                  if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
541                 S[koffset+kdeg] = j ;
542                 kdeg++;
543                  }
544               }
545    
546                  }
547                  offset_S[i+my_n]=koffset;
548              degree_S[i+my_n]=kdeg;
549              }
550    
551              Paso_Coupler_free(threshold_coupler);
552         }
553         TMPMEMFREE(threshold_p);
554  }  }
555    
556  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 366  void Paso_Preconditioner_AMG_setStrongCo Line 559  void Paso_Preconditioner_AMG_setStrongCo
559    
560  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F  in the sense that |A_{ij}|_F >= theta * max_k |A_{ik}|_F
561  */  */
562  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SparseMatrix* A,  void Paso_Preconditioner_AMG_setStrongConnections_Block(Paso_SystemMatrix* A,
563                              dim_t *degree, index_t *S,                              dim_t *degree_S, index_t *offset_S, index_t *S,
564                              const double theta, const double tau)                              const double theta, const double tau)
565    
566  {  {
567     const dim_t n_block=A->row_block_size;     const dim_t n_block=A->block_size;
568     const dim_t n=A->numRows;     const dim_t my_n=A->mainBlock->numRows;
569     index_t iptr, i,j, bi;     const dim_t overlap_n=A->row_coupleBlock->numRows;
570     dim_t kdeg, max_deg;    
571     register double max_offdiagonal, threshold, fnorm, sum_row, main_row;     index_t iptr, i, bi;
572     double *rtmp;     double *threshold_p=NULL;
573        
574        
575        #pragma omp parallel private(i,iptr,max_offdiagonal, kdeg, threshold,j, max_deg, fnorm, sum_row, main_row, rtmp)     threshold_p = TMPMEMALLOC(2*my_n, double);
576        {    
577       max_deg=0;     #pragma omp parallel private(i,iptr, bi)
578       #pragma omp for schedule(static)     {
579       for (i=0;i<n;++i) max_deg=MAX(max_deg, A->pattern->ptr[i+1]-A->pattern->ptr[i]);    
580                dim_t max_deg=0;
581       rtmp=TMPMEMALLOC(max_deg, double);        double *rtmp=NULL;
582          
583       #pragma omp for schedule(static)        #pragma omp for schedule(static)
584       for (i=0;i<n;++i) {        for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
585                             +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
586          max_offdiagonal = 0.;        
587          sum_row=0;        rtmp=TMPMEMALLOC(max_deg, double);
588          main_row=0;        
589          for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {        #pragma omp for schedule(static)
590             j=A->pattern->index[iptr];        for (i=0;i<my_n;++i) {        
591             fnorm=0;       register double max_offdiagonal = 0.;
592             #pragma ivdep       register double sum_row=0;
593             for(bi=0;bi<n_block*n_block;++bi) fnorm+=A->val[iptr*n_block*n_block+bi]*A->val[iptr*n_block*n_block+bi];       register double main_row=0;
594             fnorm=sqrt(fnorm);       register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
595             rtmp[iptr-A->pattern->ptr[i]]=fnorm;       register dim_t kdeg=0;
596             if( j != i) {       register const index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
597            max_offdiagonal = MAX(max_offdiagonal,fnorm);      
598            sum_row+=fnorm;       /* collect information for row i: */
599             } else {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
600            main_row=fnorm;          register index_t j=A->mainBlock->pattern->index[iptr];
601             }          register double fnorm=0;
602            #pragma ivdep
603            for(bi=0;bi<n_block;++bi) {
604                register double  rtmp2= A->mainBlock->val[iptr*n_block+bi];
605               fnorm+=rtmp2*rtmp2;
606          }          }
607          threshold = theta*max_offdiagonal;          fnorm=sqrt(fnorm);
608            rtmp[iptr+rtmp_offset]=fnorm;
609            
610            if( j != i) {
611               max_offdiagonal = MAX(max_offdiagonal,fnorm);
612               sum_row+=fnorm;
613            } else {
614               main_row=fnorm;
615            }
616        
617         }
618          
619             rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
620         for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
621            register double fnorm=0;
622            #pragma ivdep
623            for(bi=0;bi<n_block;++bi) {
624               register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];
625               fnorm+=rtmp2*rtmp2;
626            }
627            fnorm=sqrt(fnorm);
628            
629            rtmp[iptr+rtmp_offset]=fnorm;
630            max_offdiagonal = MAX(max_offdiagonal,fnorm);
631            sum_row+=fnorm;
632         }
633        
634                
635          kdeg=0;       /* inspect row i: */
636         {
637            const double threshold = theta*max_offdiagonal;
638            rtmp_offset=-A->mainBlock->pattern->ptr[i];
639            
640            threshold_p[2*i+1]=threshold;
641          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal domainance */
642               threshold_p[2*i]=1;
643               rtmp_offset=-A->mainBlock->pattern->ptr[i];
644               #pragma ivdep
645               for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
646              register index_t j=A->mainBlock->pattern->index[iptr];
647              if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
648                 S[koffset+kdeg] = j;
649                 kdeg++;
650              }
651               }
652               rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
653             #pragma ivdep             #pragma ivdep
654             for (iptr=A->pattern->ptr[i];iptr<A->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
655            j=A->pattern->index[iptr];            register index_t j=A->col_coupleBlock->pattern->index[iptr];
656            if(rtmp[iptr-A->pattern->ptr[i]] > threshold && i!=j) {            if( rtmp[iptr+rtmp_offset] >threshold) {
657               S[A->pattern->ptr[i]+kdeg] = j;               S[koffset+kdeg] = j + my_n;
658               kdeg++;               kdeg++;
659            }            }
660             }             }
661            } else {
662               threshold_p[2*i]=-1;
663            }
664         }
665         offset_S[i]=koffset;
666         degree_S[i]=kdeg;
667          }
668          TMPMEMFREE(rtmp);
669       }
670       /* now we need to distribute the threshold and the diagonal dominance indicator */
671       if (A->mpi_info->size > 1) {
672          
673          const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
674                                 -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
675          
676          double *remote_threshold=NULL;
677          
678          Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
679          Paso_Coupler_startCollect(threshold_coupler,threshold_p);
680          Paso_Coupler_finishCollect(threshold_coupler);
681          remote_threshold=threshold_coupler->recv_buffer;
682          
683          #pragma omp parallel for private(i,iptr) schedule(static)
684          for (i=0; i<overlap_n; i++) {
685        
686         const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
687         register dim_t kdeg=0;
688         register const index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
689         if (remote_threshold[2*i]>0) {
690            #pragma ivdep
691            for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
692               register index_t j=A->row_coupleBlock->pattern->index[iptr];
693               register double fnorm2=0;
694               #pragma ivdepremote_threshold[2*i]
695               for(bi=0;bi<n_block;++bi) {
696              register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];
697              fnorm2+=rtmp2*rtmp2;
698               }
699              
700               if(fnorm2 > threshold2 ) {
701              S[koffset+kdeg] = j ;
702              kdeg++;
703               }
704          }          }
705          degree[i]=kdeg;          
706       }             }
707       TMPMEMFREE(rtmp);       offset_S[i+my_n]=koffset;
708        } /* end of parallel region */       degree_S[i+my_n]=kdeg;
709          }
710  }          Paso_Coupler_free(threshold_coupler);
711       }
712  /* the runge stueben coarsening algorithm: */     TMPMEMFREE(threshold_p);
713  void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset,  }
714                          const dim_t* degree, const index_t* S,  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
715                          index_t*split_marker, const bool_t usePanel)                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
716  {  {
717         index_t i, j;
718     index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;     dim_t p;
719     dim_t i,k, p, q, *degreeT=NULL, len_panel, len_panel_new;     dim_t len=0;
720     register index_t j, itmp;     #pragma omp parallel for private(i) schedule(static)
721         for (i=0; i<nT ;++i) {
722     if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */        degree_ST[i]=0;
     
    lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);  
    degreeT=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degreeT);  
    ST=TMPMEMALLOC(offset[n], index_t);  Esys_checkPtr(ST);  
    if (usePanel) {  
       notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);  
       panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);  
723     }     }
724       for (i=0; i<n ;++i) {
725          for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
726       }
727       for (i=0; i<nT ;++i) {
728          offset_ST[i]=len;
729          len+=degree_ST[i];
730          degree_ST[i]=0;
731       }
732       for (i=0; i<n ;++i) {
733          for (p=0; p<degree_S[i]; ++p) {
734           j=S[offset_S[i]+p];
735           ST[offset_ST[j]+degree_ST[j]]=i;
736           degree_ST[j]++;
737          }
738       }
739    }
740    
741    void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
742                            const dim_t* degree_S, const index_t* offset_S, const index_t* S,
743                            const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
744                            Paso_Connector* col_connector, Paso_Distribution* col_dist)
745    {
746       dim_t i, numUndefined,   iter=0;
747      index_t iptr, jptr, kptr;
748      double *random=NULL, *w=NULL, *Status=NULL;
749      index_t * ST_flag=NULL;
750    
751      Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector  ,1);
752        
753      w=TMPMEMALLOC(n, double);
754      Status=TMPMEMALLOC(n, double);
755      random = Paso_Distribution_createRandomVector(col_dist,1);
756      ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
757        
758        #pragma omp parallel for private(i)
759     if (Esys_noError() ) {    for (i=0; i< my_n; ++i) {
760        /* initialize  split_marker and split_marker :*/        w[i]=degree_ST[i]+random[i];
761        /* those unknows which are not influenced go into F, the rest is available for F or C */        if (degree_ST[i] < 1) {
762        #pragma omp parallel for private(i) schedule(static)         Status[i]=-100; /* F point  */
763        for (i=0;i<n;++i) {        } else {
764       degreeT[i]=0;         Status[i]=1; /* status undefined */
      if (degree[i]>0) {  
         lambda[i]=0;  
         split_marker[i]=PASO_AMG_UNDECIDED;  
      } else {  
         split_marker[i]=PASO_AMG_IN_F;  
         lambda[i]=-1;  
      }  
765        }        }
766        /* create transpose :*/    }
767        for (i=0;i<n;++i) {    #pragma omp parallel for private(i, iptr)
768          for (p=0; p<degree[i]; ++p) {    for (i=0; i< n; ++i) {
769             j=S[offset[i]+p];        for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {
770             ST[offset[j]+degreeT[j]]=i;       ST_flag[offset_ST[i]+iptr]=1;
            degreeT[j]++;  
         }  
771        }        }
772        /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */    }  
773        #pragma omp parallel for private(i, j, itmp) schedule(static)  
774        for (i=0;i<n;++i) {    
775       if (split_marker[i]==PASO_AMG_UNDECIDED) {    numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
776          itmp=lambda[i];    printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);
777          for (p=0; p<degreeT[i]; ++p) {  
778             j=ST[offset[i]+p];    
779             if (split_marker[j]==PASO_AMG_UNDECIDED) {    iter=0;
780            itmp++;    while (numUndefined > 0) {
781             } else {  /* at this point there are no C points */       Paso_Coupler_fillOverlap(n, w, w_coupler);
782            itmp+=2;       {
783        int p;
784        for (p=0; p<my_n; ++p) {
785           printf(" %d : %f %f \n",p, w[p], Status[p]);
786        }
787        for (p=my_n; p<n; ++p) {
788           printf(" %d : %f \n",p, w[p]);
789        }
790        
791        
792         }
793        
794          /* calculate the maximum value of naigbours following active strong connections:
795            w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active  */      
796          #pragma omp parallel for private(i, iptr)
797          for (i=0; i<my_n; ++i) {
798         if (Status[i]>0) { /* status is still undefined */
799    
800            register bool_t inD=TRUE;
801            const double wi=w[i];
802    
803    
804            for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
805    
806               const index_t k=S[offset_S[i]+iptr];
807               const index_t* start_p = &ST[offset_ST[k]];
808               const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
809    
810               if (ST_flag[(index_t)(where_p-start_p)]>0) {
811    printf("S: %d (%e) -> %d    (%e)\n",i, wi, k, w[k]);      
812              if (wi <= w[k] ) {
813                 inD=FALSE;
814                 break;
815              }
816             }             }
817            
818            }
819            
820            if (inD) {
821              for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
822                 const index_t k=ST[offset_ST[i]+iptr];
823                 if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
824    printf("ST: %d (%e) -> %d   (%e)\n",i, wi, k, w[k]);      
825                
826                           if (wi <= w[k] ) {
827                   inD=FALSE;
828                   break;
829                }
830                 }
831              }
832            }    
833            if (inD) {
834               Status[i]=0.; /* is in D */
835    printf("%d is in D\n",i);
836          }          }
         lambda[i]=itmp;  
837       }       }
838        
839        }        }
       if (usePanel) {  
      #pragma omp parallel for private(i) schedule(static)  
      for (i=0;i<n;++i) notInPanel[i]=TRUE;  
       }  
       /* start search :*/  
       i=Paso_Util_arg_max(n,lambda);  
       while (lambda[i]>-1) { /* is there any undecided unknown? */  
   
      if (usePanel) {  
         len_panel=0;  
         do {  
            /* the unknown i is moved to C */  
            split_marker[i]=PASO_AMG_IN_C;  
            lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
             
            /* all undecided unknown strongly coupled to i are moved to F */  
            for (p=0; p<degreeT[i]; ++p) {  
           j=ST[offset[i]+p];  
             
           if (split_marker[j]==PASO_AMG_UNDECIDED) {  
               
              split_marker[j]=PASO_AMG_IN_F;  
              lambda[j]=-1;  
               
              for (q=0; q<degreeT[j]; ++q) {  
             k=ST[offset[j]+q];  
             if (split_marker[k]==PASO_AMG_UNDECIDED) {  
                lambda[k]++;  
                if (notInPanel[k]) {  
                   notInPanel[k]=FALSE;  
                   panel[len_panel]=k;  
                   len_panel++;  
                }  
840    
841              }    /* the unknown i is moved to C */        Paso_Coupler_fillOverlap(n, Status, w_coupler);
842              split_marker[i]=PASO_AMG_IN_C;  
843              lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
844               }       /*   remove connection to D points :
845                    
846               for each i in D:
847              for each j in S_i:
848                 w[j]--
849                 ST_tag[j,i]=-1
850              for each j in ST[i]:
851                 ST_tag[i,j]=-1
852                 for each k in ST[j]:
853                if k in ST[i]:
854                   w[j]--;
855                ST_tag[j,k]=-1
856                
857         */
858         /* w is updated  for local rows only */
859         {
860            #pragma omp parallel for private(i, jptr)
861            for (i=0; i< my_n; ++i) {
862    
863               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
864              const index_t j=ST[offset_ST[i]+jptr];
865              if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
866                 w[i]--;
867    printf("%d reduced by %d\n",i,j);
868                 ST_flag[offset_ST[i]+jptr]=-1;
869            }            }
870             }             }
871             for (p=0; p<degree[i]; ++p) {            
872            j=S[offset[i]+p];          }
873            if (split_marker[j]==PASO_AMG_UNDECIDED) {          #pragma omp parallel for private(i, jptr)
874               lambda[j]--;          for (i=my_n; i< n; ++i) {
875               if (notInPanel[j]) {             for (jptr=0; jptr<degree_ST[i]; ++jptr) {
876              notInPanel[j]=FALSE;            const index_t j = ST[offset_ST[i]+jptr];
877              panel[len_panel]=j;            if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
             len_panel++;  
              }  
           }  
878             }             }
879            }
880    
881            
882            for (i=0; i< n; ++i) {
883               if ( Status[i] == 0. ) {
884    
885                         const index_t* start_p = &ST[offset_ST[i]];
886    
887             /* consolidate panel */               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
888             /* remove lambda[q]=-1 */              const index_t j=ST[offset_ST[i]+jptr];
889             lambda_max=-1;  printf("check connection: %d %d\n",i,j);
890             i=-1;              ST_flag[offset_ST[i]+jptr]=-1;
891             len_panel_new=0;              for (kptr=0; kptr<degree_ST[j]; ++kptr) {
892             for (q=0; q<len_panel; q++) {                 const index_t k=ST[offset_ST[j]+kptr];
893               k=panel[q];  printf("check connection: %d of %d\n",k,j);
894               lambda_k=lambda[k];                 if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
895               if (split_marker[k]==PASO_AMG_UNDECIDED) {  printf("found!\n");
896              panel[len_panel_new]=k;                    if (ST_flag[offset_ST[j]+kptr] >0) {
897              len_panel_new++;                   if (j< my_n ) {
898                        w[j]--;
899              if (lambda_max == lambda_k) {  printf("%d reduced by %d and %d \n",j, i,k);
900                 if (k<i) i=k;                   }
901              } else if (lambda_max < lambda_k) {                   ST_flag[offset_ST[j]+kptr]=-1;
902                 lambda_max =lambda_k;                    }
903                 i=k;                 }
904              }              }
905               }               }
            }  
            len_panel=len_panel_new;  
         } while (len_panel>0);      
      } else {  
         /* the unknown i is moved to C */  
         split_marker[i]=PASO_AMG_IN_C;  
         lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
           
         /* all undecided unknown strongly coupled to i are moved to F */  
         for (p=0; p<degreeT[i]; ++p) {  
            j=ST[offset[i]+p];  
            if (split_marker[j]==PASO_AMG_UNDECIDED) {  
           
           split_marker[j]=PASO_AMG_IN_F;  
           lambda[j]=-1;  
           
           for (q=0; q<degreeT[j]; ++q) {  
              k=ST[offset[j]+q];  
              if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;  
906            }            }
   
            }  
907          }          }
908          for (p=0; p<degree[i]; ++p) {       }
909             j=S[offset[i]+p];       /* adjust status */
910             if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;       #pragma omp parallel for private(i)
911         for (i=0; i< my_n; ++i) {
912            if ( Status[i] == 0. ) {
913               Status[i] = -10;   /* this is now a C point */
914            } else if ( w[i]<1.) {
915               Status[i] = -100;   /* this is now a F point */  
916          }          }
           
917       }       }
918       i=Paso_Util_arg_max(n,lambda);      
919        }      
920     }       numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
921     TMPMEMFREE(lambda);       iter++;
922     TMPMEMFREE(ST);       printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
923     TMPMEMFREE(degreeT);    } /* end of while loop */
924     TMPMEMFREE(panel);  
925     TMPMEMFREE(notInPanel);  
926      /* map to output :*/
927      Paso_Coupler_fillOverlap(n, Status, w_coupler);
928      #pragma omp parallel for private(i)
929      for (i=0; i< n; ++i) {
930         if (Status[i] > -50.) {
931            split_marker[i]=PASO_AMG_IN_C;
932         } else {
933            split_marker[i]=PASO_AMG_IN_F;
934         }
935      }
936    
937      /* clean up : */
938      Paso_Coupler_free(w_coupler);
939      TMPMEMFREE(random);
940      TMPMEMFREE(w);
941      TMPMEMFREE(Status);
942      TMPMEMFREE(ST_flag);
943      
944      return;
945  }  }

Legend:
Removed from v.3352  
changed lines
  Added in v.3489

  ViewVC Help
Powered by ViewVC 1.1.26