/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3481 by gross, Mon Jan 31 07:06:42 2011 UTC revision 3482 by gross, Wed Mar 23 04:06:52 2011 UTC
# Line 91  Paso_Preconditioner_AMG* Paso_Preconditi Line 91  Paso_Preconditioner_AMG* Paso_Preconditi
91    Paso_Preconditioner_AMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
92    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
93    
94    const dim_t my_n=Paso_SystemMatrix_getNumRows(A_p);    const dim_t my_n=A_p->mainBlock->numRows;
95    const dim_t overlap_n=Paso_SystemMatrix_getColOverlap(A_p);    const dim_t overlap_n=A_p->row_coupleBlock->numRows;
96      
97    const dim_t n = my_n + overlap_n;    const dim_t n = my_n + overlap_n;
98    
99    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
# Line 141  Paso_Preconditioner_AMG* Paso_Preconditi Line 142  Paso_Preconditioner_AMG* Paso_Preconditi
142         return NULL;         return NULL;
143    }  else {    }  else {
144       /* Start Coarsening : */       /* Start Coarsening : */
145       const dim_t len_S=A_p->mainBlock->pattern->len+A_p->col_coupleBlock->pattern->len;      
146       dim_t* degree_S=TMPMEMALLOC(my_n, dim_t);       /* this is the table for strong connections combining mainBlock, col_coupleBlock and row_coupleBlock */
147       index_t *offset_S=TMPMEMALLOC(my_n, index_t);       const dim_t len_S=A_p->mainBlock->pattern->len + A_p->col_coupleBlock->pattern->len + A_p->row_coupleBlock->pattern->len;
      index_t *S=TMPMEMALLOC(len_S, index_t);  
148    
149         dim_t* degree_S=TMPMEMALLOC(n, dim_t);
150         index_t *offset_S=TMPMEMALLOC(n, index_t);
151         index_t *S=TMPMEMALLOC(len_S, index_t);
152         dim_t* degree_ST=TMPMEMALLOC(n, dim_t);
153         index_t *offset_ST=TMPMEMALLOC(n, index_t);
154         index_t *ST=TMPMEMALLOC(len_S, index_t);
155        
156        
157       F_marker=TMPMEMALLOC(n,index_t);       F_marker=TMPMEMALLOC(n,index_t);
158       counter=TMPMEMALLOC(n,index_t);       counter=TMPMEMALLOC(n,index_t);
159    
160       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S) ) ) {       if ( !( Esys_checkPtr(F_marker) || Esys_checkPtr(counter) || Esys_checkPtr(degree_S) || Esys_checkPtr(offset_S) || Esys_checkPtr(S)
161        || Esys_checkPtr(degree_ST) || Esys_checkPtr(offset_ST) || Esys_checkPtr(ST) ) ) {
162      /*      /*
163             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical             make sure that corresponding values in the row_coupleBlock and col_coupleBlock are identical
164      */      */
165      Paso_SystemMatrix_copyColCoupleBlock(A_p);          Paso_SystemMatrix_copyColCoupleBlock(A_p);
166            /* Paso_SystemMatrix_copyRemoteCoupleBlock(A_p, FALSE); */
167    
168      /*      /*
169            set splitting of unknows:            set splitting of unknows:
# Line 165  Paso_Preconditioner_AMG* Paso_Preconditi Line 175  Paso_Preconditioner_AMG* Paso_Preconditi
175       } else {       } else {
176             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
177       }       }
178      Esys_setError(SYSTEM_ERROR, "AMG:DONE.");       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
179      return NULL;      
180  {  {
181     dim_t p;     dim_t p;
182     for (i=0; i< my_n; ++i) {     for (i=0; i<n; ++i) {
183           printf("%d: ",i);           printf("%d: ",i);
184          for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);          for (p=0; p<degree_S[i];++p) printf("%d ",S[offset_S[i]+p]);
185          printf("\n");          printf("\n");
186     }     }
187  }  }
188         Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
189                                degree_S, offset_S, S, degree_ST, offset_ST, ST,
190                            A_p->col_coupler->connector,A_p->col_distribution);
191    Esys_setError(SYSTEM_ERROR, "AMG:DONE.");
192    return NULL;
193                
194    
195  /*MPI:  /*MPI:
# Line 288  Paso_Preconditioner_AMG* Paso_Preconditi Line 303  Paso_Preconditioner_AMG* Paso_Preconditi
303  /*MPI:  /*MPI:
304                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);                 Atemp=Paso_SystemMatrix_MatrixMatrix(A_p,out->P);
305                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);                 A_C=Paso_SystemMatrix_MatrixMatrix(out->R,Atemp);
306                                 Paso_Preconditioner_AMG_setStrongConnections
307                 Paso_SystemMatrix_free(Atemp);                 Paso_SystemMatrix_free(Atemp);
308  */  */
309    
# Line 345  Paso_Preconditioner_AMG* Paso_Preconditi Line 360  Paso_Preconditioner_AMG* Paso_Preconditi
360    TMPMEMFREE(degree_S);    TMPMEMFREE(degree_S);
361    TMPMEMFREE(offset_S);    TMPMEMFREE(offset_S);
362    TMPMEMFREE(S);    TMPMEMFREE(S);
363      TMPMEMFREE(degree_ST);
364      TMPMEMFREE(offset_ST);
365      TMPMEMFREE(ST);
366      
367    }    }
368    
369    if (Esys_noError()) {    if (Esys_noError()) {
# Line 425  void Paso_Preconditioner_AMG_setStrongCo Line 444  void Paso_Preconditioner_AMG_setStrongCo
444                            dim_t *degree_S, index_t *offset_S, index_t *S,                            dim_t *degree_S, index_t *offset_S, index_t *S,
445                        const double theta, const double tau)                        const double theta, const double tau)
446  {  {
447     const dim_t my_n=Paso_SystemMatrix_getNumRows(A);  
448       const dim_t my_n=A->mainBlock->numRows;
449       const dim_t overlap_n=A->row_coupleBlock->numRows;
450      
451     index_t iptr, i;     index_t iptr, i;
452       double *threshold_p=NULL;
453    
454    
455        #pragma omp parallel for private(i,iptr) schedule(static)     threshold_p = TMPMEMALLOC(2*my_n, double);
456        for (i=0;i<my_n;++i) {            
457       #pragma omp parallel for private(i,iptr) schedule(static)
458       for (i=0;i<my_n;++i) {        
459        
460       register double max_offdiagonal = 0.;       register double max_offdiagonal = 0.;
461       register double sum_row=0;       register double sum_row=0;
462       register double main_row=0;       register double main_row=0;
463       register dim_t kdeg=0;       register dim_t kdeg=0;
464           register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];           const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
465            
466         /* collect information for row i: */
467       #pragma ivdep       #pragma ivdep
468       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
469          register index_t j=A->mainBlock->pattern->index[iptr];          register index_t j=A->mainBlock->pattern->index[iptr];
# Line 449  void Paso_Preconditioner_AMG_setStrongCo Line 477  void Paso_Preconditioner_AMG_setStrongCo
477          }          }
478    
479       }       }
480        
481       #pragma ivdep       #pragma ivdep
482       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
483          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);          register double fnorm=ABS(A->col_coupleBlock->val[iptr]);
# Line 456  void Paso_Preconditioner_AMG_setStrongCo Line 485  void Paso_Preconditioner_AMG_setStrongCo
485          sum_row+=fnorm;          sum_row+=fnorm;
486       }       }
487    
488             /* inspect row i: */
489           {           {
490          const double threshold = theta*max_offdiagonal;          const double threshold = theta*max_offdiagonal;
491                threshold_p[2*i+1]=threshold;
492          if (tau*main_row < sum_row) { /* no diagonal domainance */          if (tau*main_row < sum_row) { /* no diagonal domainance */
493                   threshold_p[2*i]=1;
494             #pragma ivdep             #pragma ivdep
495             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
496                register index_t j=A->mainBlock->pattern->index[iptr];                register index_t j=A->mainBlock->pattern->index[iptr];
# Line 475  void Paso_Preconditioner_AMG_setStrongCo Line 507  void Paso_Preconditioner_AMG_setStrongCo
507               kdeg++;               kdeg++;
508                }                }
509             }             }
510                } else {
511                   threshold_p[2*i]=-1;
512              }              }
513           }           }
514           offset_S[i]=koffset;           offset_S[i]=koffset;
515       degree_S[i]=kdeg;       degree_S[i]=kdeg;
516        }        }
517          /* now we need to distribute the threshold and the diagonal dominance indicator */
518          if (A->mpi_info->size > 1) {
519    
520              const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
521                     -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
522          
523              double *remote_threshold=NULL;
524              
525          Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
526              Paso_Coupler_startCollect(threshold_coupler,threshold_p);
527              Paso_Coupler_finishCollect(threshold_coupler);
528              remote_threshold=threshold_coupler->recv_buffer;
529    
530              #pragma omp parallel for private(i,iptr) schedule(static)
531              for (i=0; i<overlap_n; i++) {
532            
533              const double threshold = remote_threshold[2*i+1];
534              register dim_t kdeg=0;
535                  const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
536                  if (remote_threshold[2*i]>0) {
537                 #pragma ivdep
538                 for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
539                  register index_t j=A->row_coupleBlock->pattern->index[iptr];
540                  if(ABS(A->row_coupleBlock->val[iptr])>threshold) {
541                 S[koffset+kdeg] = j ;
542                 kdeg++;
543                  }
544               }
545    
546                  }
547                  offset_S[i+my_n]=koffset;
548              degree_S[i+my_n]=kdeg;
549              }
550    
551              Paso_Coupler_free(threshold_coupler);
552         }
553         TMPMEMFREE(threshold_p);
554  }  }
555    
556  /* theta = threshold for strong connections */  /* theta = threshold for strong connections */
# Line 493  void Paso_Preconditioner_AMG_setStrongCo Line 564  void Paso_Preconditioner_AMG_setStrongCo
564                              const double theta, const double tau)                              const double theta, const double tau)
565    
566  {  {
567     const dim_t my_n=Paso_SystemMatrix_getNumRows(A);     const dim_t n_block=A->block_size;
568       const dim_t my_n=A->mainBlock->numRows;
569       const dim_t overlap_n=A->row_coupleBlock->numRows;
570      
571     index_t iptr, i, bi;     index_t iptr, i, bi;
572     const dim_t n_block=A->row_block_size;     double *threshold_p=NULL;
573        
574        
575        #pragma omp parallel private(i,iptr, bi)     threshold_p = TMPMEMALLOC(2*my_n, double);
576        {    
577       dim_t max_deg=0;     #pragma omp parallel private(i,iptr, bi)
578       double *rtmp=TMPMEMALLOC(max_deg, double);     {
579      
580       #pragma omp for schedule(static)        dim_t max_deg=0;
581       for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]        double *rtmp=NULL;
                                                 +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);  
         
         
      #pragma omp for schedule(static)  
      for (i=0;i<my_n;++i) {  
       
         register double max_offdiagonal = 0.;  
         register double sum_row=0;  
         register double main_row=0;  
             register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];  
         register dim_t kdeg=0;  
             register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];  
   
         for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {  
            register index_t j=A->mainBlock->pattern->index[iptr];  
            register double fnorm=0;  
            #pragma ivdep  
            for(bi=0;bi<n_block*n_block;++bi) {  
                      register double rtmp2 = A->mainBlock->val[iptr*n_block*n_block+bi];  
                      fnorm+=rtmp2*rtmp2;  
                }  
            fnorm=sqrt(fnorm);  
   
            rtmp[iptr+rtmp_offset]=fnorm;  
            if( j != i) {  
           max_offdiagonal = MAX(max_offdiagonal,fnorm);  
           sum_row+=fnorm;  
            } else {  
           main_row=fnorm;  
            }  
         }  
             rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];  
         for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {  
            register double fnorm=0;  
            #pragma ivdep  
            for(bi=0;bi<n_block*n_block;++bi) {  
                      register double rtmp2 = A->col_coupleBlock->val[iptr*n_block*n_block+bi];  
                      fnorm+=rtmp2*rtmp2;  
                }  
            fnorm=sqrt(fnorm);  
582    
583             rtmp[iptr+rtmp_offset]=fnorm;        #pragma omp for schedule(static)
584          for (i=0;i<my_n;++i) max_deg=MAX(max_deg, A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]
585                         +A->col_coupleBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i]);
586          
587          rtmp=TMPMEMALLOC(max_deg, double);
588          
589          #pragma omp for schedule(static)
590          for (i=0;i<my_n;++i) {        
591         register double max_offdiagonal = 0.;
592         register double sum_row=0;
593         register double main_row=0;
594         register index_t rtmp_offset=-A->mainBlock->pattern->ptr[i];
595         register dim_t kdeg=0;
596         const register index_t koffset=A->mainBlock->pattern->ptr[i]+A->col_coupleBlock->pattern->ptr[i];
597        
598         /* collect information for row i: */
599         for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
600            register index_t j=A->mainBlock->pattern->index[iptr];
601            register double fnorm=0;
602            #pragma ivdep
603            for(bi=0;bi<n_block;++bi) {
604                register double  rtmp2= A->mainBlock->val[iptr*n_block+bi];
605               fnorm+=rtmp2*rtmp2;
606            }
607            fnorm=sqrt(fnorm);
608            rtmp[iptr+rtmp_offset]=fnorm;
609            
610            if( j != i) {
611             max_offdiagonal = MAX(max_offdiagonal,fnorm);             max_offdiagonal = MAX(max_offdiagonal,fnorm);
612             sum_row+=fnorm;             sum_row+=fnorm;
613            } else {
614               main_row=fnorm;
615          }          }
616              {      
617              const double threshold = theta*max_offdiagonal;       }
618          
619              if (tau*main_row < sum_row) { /* no diagonal domainance */           rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
620                     rtmp_offset=-A->mainBlock->pattern->ptr[i];       for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
621                 #pragma ivdep          register double fnorm=0;
622                 for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {          #pragma ivdep
623                register index_t j=A->mainBlock->pattern->index[iptr];          for(bi=0;bi<n_block;++bi) {
624                if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {             register double rtmp2 = A->col_coupleBlock->val[iptr*n_block+bi];
625                   S[koffset+kdeg] = j;             fnorm+=rtmp2*rtmp2;
                  kdeg++;  
               }  
                }  
                    rtmp_offset=A->mainBlock->pattern->ptr[i+1]-A->mainBlock->pattern->ptr[i]-A->col_coupleBlock->pattern->ptr[i];  
                #pragma ivdep  
                for (iptr=A->col_coupleBlock->pattern->ptr[i]; iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {  
               register index_t j=A->col_coupleBlock->pattern->index[iptr];  
               if(rtmp[iptr+rtmp_offset] > threshold) {  
                  S[koffset+kdeg] = j + my_n;  
                  kdeg++;  
               }  
                }  
             }  
             }  
         degree_S[i]=kdeg;  
             offset_S[i]=koffset;  
      }        
      TMPMEMFREE(rtmp);  
       } /* end of parallel region */  
   
 }    
   
 #ifdef AAAAA  
 /* the runge stueben coarsening algorithm: */  
 void Paso_Preconditioner_AMG_RungeStuebenSearch(const dim_t n, const index_t* offset_S,  
                         const dim_t* degree_S, const index_t* S,  
                         index_t*split_marker, const bool_t usePanel)  
 {  
     
    index_t *lambda=NULL, *ST=NULL, *notInPanel=NULL, *panel=NULL, lambda_max, lambda_k;  
    dim_t i,k, p, q, *degree_ST=NULL, len_panel, len_panel_new;  
    register index_t j, itmp;  
     
    if (n<=0) return; /* make sure that the return of Paso_Util_arg_max is not pointing to nirvana */  
     
    lambda=TMPMEMALLOC(n, index_t); Esys_checkPtr(lambda);  
    degree_ST=TMPMEMALLOC(n, dim_t); Esys_checkPtr(degree_ST);  
    ST=TMPMEMALLOC(offset_S[n], index_t);  Esys_checkPtr(ST);  
    if (usePanel) {  
       notInPanel=TMPMEMALLOC(n, bool_t); Esys_checkPtr(notInPanel);  
       panel=TMPMEMALLOC(n, index_t); Esys_checkPtr(panel);  
    }  
     
     
     
    if (Esys_noError() ) {  
       /* initialize  split_marker and split_marker :*/  
       /* those unknows which are not influenced go into F, the rest is available for F or C */  
       #pragma omp parallel for private(i) schedule(static)  
       for (i=0;i<n;++i) {  
      degree_ST[i]=0;  
      if (degree_S[i]>0) {  
         lambda[i]=0;  
         split_marker[i]=PASO_AMG_UNDECIDED;  
      } else {  
         split_marker[i]=PASO_AMG_IN_F;  
         lambda[i]=-1;  
      }  
       }  
       /* create transpose :*/  
       for (i=0;i<n;++i) {  
         for (p=0; p<degree_S[i]; ++p) {  
            j=S[offset_S[i]+p];  
            ST[offset_S[j]+degree_ST[j]]=i;  
            degree_ST[j]++;  
         }  
       }  
       /* lambda[i] = |undecided k in ST[i]| + 2 * |F-unknown in ST[i]| */  
       #pragma omp parallel for private(i, j, itmp) schedule(static)  
       for (i=0;i<n;++i) {  
      if (split_marker[i]==PASO_AMG_UNDECIDED) {  
         itmp=lambda[i];  
         for (p=0; p<degree_ST[i]; ++p) {  
            j=ST[offset_S[i]+p];  
            if (split_marker[j]==PASO_AMG_UNDECIDED) {  
           itmp++;  
            } else {  /* at this point there are no C points */  
           itmp+=2;  
            }  
626          }          }
627          lambda[i]=itmp;          fnorm=sqrt(fnorm);
628            
629            rtmp[iptr+rtmp_offset]=fnorm;
630            max_offdiagonal = MAX(max_offdiagonal,fnorm);
631            sum_row+=fnorm;
632       }       }
633        }      
634        if (usePanel) {        
635       #pragma omp parallel for private(i) schedule(static)       /* inspect row i: */
636       for (i=0;i<n;++i) notInPanel[i]=TRUE;       {
637        }          const double threshold = theta*max_offdiagonal;
638        /* start search :*/          rtmp_offset=-A->mainBlock->pattern->ptr[i];
639        i=Paso_Util_arg_max(n,lambda);          
640        while (lambda[i]>-1) { /* is there any undecided unknown? */          threshold_p[2*i+1]=threshold;
641            if (tau*main_row < sum_row) { /* no diagonal domainance */
642       if (usePanel) {             threshold_p[2*i]=1;
643          len_panel=0;             rtmp_offset=-A->mainBlock->pattern->ptr[i];
644          do {             #pragma ivdep
645             /* the unknown i is moved to C */             for (iptr=A->mainBlock->pattern->ptr[i];iptr<A->mainBlock->pattern->ptr[i+1]; ++iptr) {
646             split_marker[i]=PASO_AMG_IN_C;            register index_t j=A->mainBlock->pattern->index[iptr];
647             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */            if(rtmp[iptr+rtmp_offset] > threshold && i!=j) {
648                           S[koffset+kdeg] = j;
649             /* all undecided unknown strongly coupled to i are moved to F */               kdeg++;
            for (p=0; p<degree_ST[i]; ++p) {  
           j=ST[offset_S[i]+p];  
             
           if (split_marker[j]==PASO_AMG_UNDECIDED) {  
               
              split_marker[j]=PASO_AMG_IN_F;  
              lambda[j]=-1;  
               
              for (q=0; q<degree_ST[j]; ++q) {  
             k=ST[offset_S[j]+q];  
             if (split_marker[k]==PASO_AMG_UNDECIDED) {  
                lambda[k]++;  
                if (notInPanel[k]) {  
                   notInPanel[k]=FALSE;  
                   panel[len_panel]=k;  
                   len_panel++;  
                }  
   
             }    /* the unknown i is moved to C */  
             split_marker[i]=PASO_AMG_IN_C;  
             lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
              }  
               
650            }            }
651             }             }
652             for (p=0; p<degree_S[i]; ++p) {             rtmp_offset+=A->mainBlock->pattern->ptr[i+1]-A->col_coupleBlock->pattern->ptr[i];
653            j=S[offset_S[i]+p];             #pragma ivdep
654            if (split_marker[j]==PASO_AMG_UNDECIDED) {             for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
655               lambda[j]--;            register index_t j=A->col_coupleBlock->pattern->index[iptr];
656               if (notInPanel[j]) {            if( rtmp[iptr+rtmp_offset] >threshold) {
657              notInPanel[j]=FALSE;               S[koffset+kdeg] = j + my_n;
658              panel[len_panel]=j;               kdeg++;
             len_panel++;  
              }  
659            }            }
660             }             }
661            } else {
662             /* consolidate panel */             threshold_p[2*i]=-1;
663             /* remove lambda[q]=-1 */          }
664             lambda_max=-1;       }
665             i=-1;       offset_S[i]=koffset;
666             len_panel_new=0;       degree_S[i]=kdeg;
667             for (q=0; q<len_panel; q++) {        }
668               k=panel[q];        TMPMEMFREE(rtmp);
669               lambda_k=lambda[k];     }
670               if (split_marker[k]==PASO_AMG_UNDECIDED) {     /* now we need to distribute the threshold and the diagonal dominance indicator */
671              panel[len_panel_new]=k;     if (A->mpi_info->size > 1) {
672              len_panel_new++;        
673          const index_t koffset_0=A->mainBlock->pattern->ptr[my_n]+A->col_coupleBlock->pattern->ptr[my_n]
674              if (lambda_max == lambda_k) {                               -A->mainBlock->pattern->ptr[0]-A->col_coupleBlock->pattern->ptr[0];
675                 if (k<i) i=k;        
676              } else if (lambda_max < lambda_k) {        double *remote_threshold=NULL;
677                 lambda_max =lambda_k;        
678                 i=k;        Paso_Coupler* threshold_coupler=Paso_Coupler_alloc(A->row_coupler->connector  ,2);
679              }        Paso_Coupler_startCollect(threshold_coupler,threshold_p);
680               }        Paso_Coupler_finishCollect(threshold_coupler);
681          remote_threshold=threshold_coupler->recv_buffer;
682          
683          #pragma omp parallel for private(i,iptr) schedule(static)
684          for (i=0; i<overlap_n; i++) {
685        
686         const double threshold2 = remote_threshold[2*i+1]*remote_threshold[2*i+1];
687         register dim_t kdeg=0;
688         const register index_t koffset=koffset_0+A->row_coupleBlock->pattern->ptr[i];
689         if (remote_threshold[2*i]>0) {
690            #pragma ivdep
691            for (iptr=A->row_coupleBlock->pattern->ptr[i];iptr<A->row_coupleBlock->pattern->ptr[i+1]; ++iptr) {
692               register index_t j=A->row_coupleBlock->pattern->index[iptr];
693               register double fnorm2=0;
694               #pragma ivdepremote_threshold[2*i]
695               for(bi=0;bi<n_block;++bi) {
696              register double rtmp2 = A->row_coupleBlock->val[iptr*n_block+bi];
697              fnorm2+=rtmp2*rtmp2;
698             }             }
699             len_panel=len_panel_new;            
700          } while (len_panel>0);                 if(fnorm2 > threshold2 ) {
701       } else {            S[koffset+kdeg] = j ;
702          /* the unknown i is moved to C */            kdeg++;
         split_marker[i]=PASO_AMG_IN_C;  
         lambda[i]=-1;  /* lambda from unavailable unknowns is set to -1 */  
           
         /* all undecided unknown strongly coupled to i are moved to F */  
         for (p=0; p<degree_ST[i]; ++p) {  
            j=ST[offset_S[i]+p];  
            if (split_marker[j]==PASO_AMG_UNDECIDED) {  
           
           split_marker[j]=PASO_AMG_IN_F;  
           lambda[j]=-1;  
           
           for (q=0; q<degree_ST[j]; ++q) {  
              k=ST[offset_S[j]+q];  
              if (split_marker[k]==PASO_AMG_UNDECIDED) lambda[k]++;  
           }  
   
703             }             }
704          }          }
         for (p=0; p<degree_S[i]; ++p) {  
            j=S[offset_S[i]+p];  
            if(split_marker[j]==PASO_AMG_UNDECIDED) lambda[j]--;  
         }  
705                    
706       }       }
707       i=Paso_Util_arg_max(n,lambda);       offset_S[i+my_n]=koffset;
708         degree_S[i+my_n]=kdeg;
709        }        }
710                    Paso_Coupler_free(threshold_coupler);
711     }     }
712     TMPMEMFREE(lambda);     TMPMEMFREE(threshold_p);
    TMPMEMFREE(ST);  
    TMPMEMFREE(degree_ST);  
    TMPMEMFREE(panel);  
    TMPMEMFREE(notInPanel);  
713  }  }
714  /* ensures that two F nodes are connected via a C node :*/  void Paso_Preconditioner_AMG_transposeStrongConnections(const dim_t n, const dim_t* degree_S, const index_t* offset_S, const index_t* S,
715  void Paso_Preconditioner_AMG_enforceFFConnectivity(const dim_t n, const index_t* offset_S,                              const dim_t nT, dim_t* degree_ST, index_t* offset_ST,index_t* ST)
                         const dim_t* degree_S, const index_t* S,  
                         index_t*split_marker)  
716  {  {
717        dim_t i, p, q;     index_t i, j;
718       dim_t p;
719        /* now we make sure that two (strongly) connected F nodes are (strongly) connected via a C node. */     dim_t len=0;
720        for (i=0;i<n;++i) {     #pragma omp parallel private(i) schedule(static)
721              if ( (split_marker[i]==PASO_AMG_IN_F) && (degree_S[i]>0) ) {     for (i=0; i<nT ;++i) {
722             for (p=0; p<degree_S[i]; ++p) {        degree_ST[i]=0;
723                    register index_t j=S[offset_S[i]+p];     }
724                    if ( (split_marker[j]==PASO_AMG_IN_F)  && (degree_S[j]>0) )  {     for (i=0; i<n ;++i) {
725                        /* i and j are now two F nodes which are strongly connected */        for (p=0; p<degree_S[i]; ++p) degree_ST[ S[offset_S[i]+p] ]++;
726                        /* is there a C node they share ? */     }
727                        register index_t sharing=-1;     for (i=0; i<nT ;++i) {
728                    for (q=0; q<degree_S[i]; ++q) {        offset_ST[i]=len;
729                           index_t k=S[offset_S[i]+q];        len+=degree_ST[i];
730                           if (split_marker[k]==PASO_AMG_IN_C) {        degree_ST[i]=0;
731                              register index_t* where_k=(index_t*)bsearch(&k, &(S[offset_S[j]]), degree_S[j], sizeof(index_t), Paso_comparIndex);     }
732                              if (where_k != NULL) {     for (i=0; i<n ;++i) {
733                                 sharing=k;        for (p=0; p<degree_S[i]; ++p) {
734                                 break;         j=S[offset_S[i]+p];
735                              }         ST[offset_ST[j]+degree_ST[j]]=i;
736                           }         degree_ST[j]++;
737                        }        }
738                        if (sharing<0) {     }
                            if (i<j) {  
                               split_marker[j]=PASO_AMG_IN_C;  
                            } else {  
                               split_marker[i]=PASO_AMG_IN_C;  
                               break;  /* no point to look any further as i is now a C node */  
                            }  
                       }  
                   }  
                }  
             }  
        }  
739  }  }
 #endif  
740    
741  #ifdef DFG  void Paso_Preconditioner_AMG_CIJPCoarsening(const dim_t n, const dim_t my_n, index_t*split_marker,
742  void Paso_Preconditioner_AMG_CIJPCoarsening( )                          const dim_t* degree_S, const index_t* offset_S, const index_t* S,
743                            const dim_t* degree_ST, const index_t* offset_ST, const index_t* ST,
744                            Paso_Connector* col_connector, Paso_Distribution* col_dist)
745  {  {
746       dim_t i, numUndefined,   iter=0;
747      index_t iptr, jptr, kptr;
748      double *random=NULL, *w=NULL, *Status=NULL;
749      index_t * ST_flag=NULL;
750    
751      Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector  ,1);
752      
753      w=TMPMEMALLOC(n, double);
754      Status=TMPMEMALLOC(n, double);
755      random = Paso_Distribution_createRandomVector(col_dist,1);
756      ST_flag=TMPMEMALLOC(offset_ST[n-1]+ degree_ST[n-1], index_t);
757      
758      #pragma omp parallel for private(i)
759      for (i=0; i< my_n; ++i) {
760          w[i]=degree_ST[i]+random[i];
761          if (degree_ST[i] < 1) {
762           Status[i]=-100; /* F point  */
763          } else {
764           Status[i]=1; /* status undefined */
765          }
766      }
767      #pragma omp parallel for private(i, iptr)
768      for (i=0; i< n; ++i) {
769          for( iptr =0 ; iptr < degree_ST[i]; ++iptr)  {
770         ST_flag[offset_ST[i]+iptr]=1;
771          }
772      }  
773    
774      
775      numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
776      printf(" coarsening loop start: num of undefined rows = %d \n",numUndefined);
777    
778        
779    const dim_t my_n;    iter=0;
780    const dim_t overlap_n;    while (numUndefined > 0) {
781    const dim_t n= my_n + overlap_n;       Paso_Coupler_fillOverlap(n, w, w_coupler);
782         {
783        int p;
784        for (p=0; p<my_n; ++p) {
785           printf(" %d : %f %f \n",p, w[p], Status[p]);
786        }
787        for (p=my_n; p<n; ++p) {
788           printf(" %d : %f \n",p, w[p]);
789        }
790        
791        
792         }
793        
794          /* calculate the maximum value of naigbours following active strong connections:
795            w2[i]=MAX(w[k]) with k in ST[i] or S[i] and (i,k) conenction is still active  */      
796          #pragma omp parallel for private(i, iptr)
797          for (i=0; i<my_n; ++i) {
798         if (Status[i]>0) { /* status is still undefined */
799    
800            register bool_t inD=TRUE;
801            const double wi=w[i];
802    
803        /* initialize  split_marker and split_marker :*/  
804        /* those unknows which are not influenced go into F, the rest is available for F or C */          for( iptr =0 ; iptr < degree_S[i]; ++iptr) {
805        #pragma omp parallel for private(i) schedule(static)  
806        for (i=0;i<n;++i) {             const index_t k=S[offset_S[i]+iptr];
807       degree_ST[i]=0;             const index_t* start_p = &ST[offset_ST[k]];
808       if (degree_S[i]>0) {             const index_t* where_p=(index_t*)bsearch(&i, start_p, degree_ST[k], sizeof(index_t), Paso_comparIndex);
809          lambda[i]=0;  
810          split_marker[i]=PASO_AMG_UNDECIDED;             if (ST_flag[(index_t)(where_p-start_p)]>0) {
811       } else {  printf("S: %d (%e) -> %d    (%e)\n",i, wi, k, w[k]);      
812          split_marker[i]=PASO_AMG_IN_F;            if (wi <= w[k] ) {
813          lambda[i]=-1;               inD=FALSE;
814       }               break;
815              }
816               }
817            
818            }
819            
820            if (inD) {
821              for( iptr =0 ; iptr < degree_ST[i]; ++iptr) {
822                 const index_t k=ST[offset_ST[i]+iptr];
823                 if ( ST_flag[offset_ST[i]+iptr] > 0 ) {
824    printf("ST: %d (%e) -> %d   (%e)\n",i, wi, k, w[k]);      
825                
826                           if (wi <= w[k] ) {
827                   inD=FALSE;
828                   break;
829                }
830                 }
831              }
832            }    
833            if (inD) {
834               Status[i]=0.; /* is in D */
835    printf("%d is in D\n",i);
836            }
837         }
838        
839        }        }
840    
841     /* set local lambda + overlap */        Paso_Coupler_fillOverlap(n, Status, w_coupler);
    #pragma omp parallel for private(i)  
    for (i=0; i<n ++i) {  
        w[i]=degree_ST[i];  
    }  
    for (i=0; i<my_n; i++) {  
       w2[i]=random;  
    }  
842    
843    
844     /* add noise to w */       /*   remove connection to D points :
845     Paso_Coupler_add(n, w, 1., w2, col_coupler);      
846               for each i in D:
847              for each j in S_i:
848                 w[j]--
849                 ST_tag[j,i]=-1
850              for each j in ST[i]:
851                 ST_tag[i,j]=-1
852                 for each k in ST[j]:
853                if k in ST[i]:
854                   w[j]--;
855                ST_tag[j,k]=-1
856                
857         */
858         /* w is updated  for local rows only */
859         {
860            #pragma omp parallel for private(j, jptr)
861            for (i=0; i< my_n; ++i) {
862    
863               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
864              const index_t j=ST[offset_ST[i]+jptr];
865              if ( (Status[j] == 0.) && (ST_flag[offset_ST[i]+jptr]>0) ) {
866                 w[i]--;
867    printf("%d reduced by %d\n",i,j);
868                 ST_flag[offset_ST[i]+jptr]=-1;
869              }
870               }
871              
872            }
873            #pragma omp parallel for private(j)
874            for (i=my_n; i< n; ++i) {
875               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
876              const index_t j = ST[offset_ST[i]+jptr];
877              if ( Status[j] == 0. ) ST_flag[offset_ST[i]+jptr]=-1;
878               }
879            }
880    
881     /*  */          
882     global_n_C=0;          for (i=0; i< n; ++i) {
883     global_n_F=..;             if ( Status[i] == 0. ) {
     
    while (global_n_C + global_n_F < global_n) {  
884    
885                               const index_t* start_p = &ST[offset_ST[i]];
       is_in_D[i]=FALSE;  
       /*  test  local connectivit*/  
       /* w2[i] = max(w[k] | k in S_i or k in S^T_i */  
       #pragma omp parallel for private(i)  
       for (i=0; i<n; ++i) w2[i]=0;  
               
       for (i=0; i<my_n; ++i) {  
          for( iPtr =0 ; iPtr < degree_S[i]; ++iPtr) {  
              k=S[offset_S[i]+iPtr];  
              w2[i]=MAX(w2[i],w[k]);  
              w2[k]=MAX(w2[k],w[i]);  
          }  
       }  
       /* adjust overlaps by MAX */  
       Paso_Coupler_max(n, w2, col_coupler);  
886    
887        /* points with w[i]>w2[i] become C nodes */               for (jptr=0; jptr<degree_ST[i]; ++jptr) {
888     }              const index_t j=ST[offset_ST[i]+jptr];
889      printf("check connection: %d %d\n",i,j);
890                ST_flag[offset_ST[i]+jptr]=-1;
891                for (kptr=0; kptr<degree_ST[j]; ++kptr) {
892                   const index_t k=ST[offset_ST[j]+kptr];
893    printf("check connection: %d of %d\n",k,j);
894                   if (NULL != bsearch(&k, start_p, degree_ST[i], sizeof(index_t), Paso_comparIndex) ) { /* k in ST[i] ? */
895    printf("found!\n");
896                      if (ST_flag[offset_ST[j]+kptr] >0) {
897                     if (j< my_n ) {
898                        w[j]--;
899    printf("%d reduced by %d and %d \n",j, i,k);
900                     }
901                     ST_flag[offset_ST[j]+kptr]=-1;
902                      }
903                   }
904                }
905                 }
906              }
907            }
908         }
909         /* adjust status */
910         #pragma omp parallel for private(i)
911         for (i=0; i< my_n; ++i) {
912            if ( Status[i] == 0. ) {
913               Status[i] = -10;   /* this is now a C point */
914            } else if ( w[i]<1.) {
915               Status[i] = -100;   /* this is now a F point */  
916            }
917         }
918        
919        
920         numUndefined = Paso_Distribution_numPositives(Status, col_dist, 1 );
921         iter++;
922         printf(" coarsening loop %d: num of undefined rows = %d \n",iter, numUndefined);
923      } /* end of while loop */
924    
925    
926      /* map to output :*/
927      Paso_Coupler_fillOverlap(n, Status, w_coupler);
928      #pragma omp parallel for private(i)
929      for (i=0; i< n; ++i) {
930         if (Status[i] > -50.) {
931            split_marker[i]=PASO_AMG_IN_C;
932         } else {
933            split_marker[i]=PASO_AMG_IN_F;
934         }
935      }
936    
937      /* clean up : */
938      Paso_Coupler_free(w_coupler);
939      TMPMEMFREE(random);
940      TMPMEMFREE(w);
941      TMPMEMFREE(Status);
942      TMPMEMFREE(ST_flag);
943      
944      return;
945  }  }
 #endif  

Legend:
Removed from v.3481  
changed lines
  Added in v.3482

  ViewVC Help
Powered by ViewVC 1.1.26