/[escript]/trunk/paso/src/AMG.c
ViewVC logotype

Diff of /trunk/paso/src/AMG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3827 by lgao, Tue Feb 14 11:42:08 2012 UTC revision 3828 by lgao, Wed Feb 15 03:27:58 2012 UTC
# Line 91  dim_t Paso_Preconditioner_AMG_getNumCoar Line 91  dim_t Paso_Preconditioner_AMG_getNumCoar
91  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {  Paso_Preconditioner_AMG* Paso_Preconditioner_AMG_alloc(Paso_SystemMatrix *A_p,dim_t level,Paso_Options* options) {
92    
93    Paso_Preconditioner_AMG* out=NULL;    Paso_Preconditioner_AMG* out=NULL;
94    Paso_SystemMatrix *A_C;    Paso_SystemMatrix *A_C=NULL;
95    bool_t verbose=options->verbose;    bool_t verbose=options->verbose;
96    
97    const dim_t my_n=A_p->mainBlock->numRows;    const dim_t my_n=A_p->mainBlock->numRows;
# Line 100  Paso_Preconditioner_AMG* Paso_Preconditi Line 100  Paso_Preconditioner_AMG* Paso_Preconditi
100    const dim_t n = my_n + overlap_n;    const dim_t n = my_n + overlap_n;
101    
102    const dim_t n_block=A_p->row_block_size;    const dim_t n_block=A_p->row_block_size;
 //  const dim_t n_block=A_p->block_size;  
103    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;    index_t* F_marker=NULL, *counter=NULL, *mask_C=NULL, *rows_in_F;
104    dim_t i, n_F, n_C, F_flag, *F_set=NULL;    dim_t i, n_F, n_C, F_flag, *F_set=NULL;
105    double time0=0;    double time0=0;
# Line 108  Paso_Preconditioner_AMG* Paso_Preconditi Line 107  Paso_Preconditioner_AMG* Paso_Preconditi
107    const double tau = options->diagonal_dominance_threshold;    const double tau = options->diagonal_dominance_threshold;
108    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);    const double sparsity=Paso_SystemMatrix_getSparsity(A_p);
109    const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);    const dim_t total_n=Paso_SystemMatrix_getGlobalTotalNumRows(A_p);
   const dim_t global_n=Paso_SystemMatrix_getGlobalNumRows(A_p);  
110    
111    
112    /*    /*
# Line 116  Paso_Preconditioner_AMG* Paso_Preconditi Line 114  Paso_Preconditioner_AMG* Paso_Preconditi
114                
115    */    */
116    if ( (sparsity >= options->min_coarse_sparsity) ||    if ( (sparsity >= options->min_coarse_sparsity) ||
117  //       (total_n <= options->min_coarse_matrix_size) ||         (total_n <= options->min_coarse_matrix_size) ||
        (global_n <= options->min_coarse_matrix_size) ||  
118         (level > options->level_max) ) {         (level > options->level_max) ) {
119    
120          if (verbose) {          if (verbose) {
# Line 182  Paso_Preconditioner_AMG* Paso_Preconditi Line 179  Paso_Preconditioner_AMG* Paso_Preconditi
179             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);             Paso_Preconditioner_AMG_setStrongConnections(A_p, degree_S, offset_S, S, theta,tau);
180       }       }
181       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);       Paso_Preconditioner_AMG_transposeStrongConnections(n, degree_S, offset_S, S, n, degree_ST, offset_ST, ST);
182  //   Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);  /*   Paso_SystemMatrix_extendedRowsForST(A_p, degree_ST, offset_ST, ST);
183     */
184    
185       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,       Paso_Preconditioner_AMG_CIJPCoarsening(n,my_n,F_marker,
186                              degree_S, offset_S, S, degree_ST, offset_ST, ST,                              degree_S, offset_S, S, degree_ST, offset_ST, ST,
# Line 210  Paso_Preconditioner_AMG* Paso_Preconditi Line 208  Paso_Preconditioner_AMG* Paso_Preconditi
208              /* collect n_F values on all processes, a direct solver should              /* collect n_F values on all processes, a direct solver should
209                  be used if any n_F value is 0 */                  be used if any n_F value is 0 */
210              F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);              F_set = TMPMEMALLOC(A_p->mpi_info->size, dim_t);
211            #ifdef ESYS_MPI
212              MPI_Allgather(&n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);              MPI_Allgather(&n_F, 1, MPI_INT, F_set, 1, MPI_INT, A_p->mpi_info->comm);
213            #endif
214              F_flag = 1;              F_flag = 1;
215              for (i=0; i<A_p->mpi_info->size; i++) {              for (i=0; i<A_p->mpi_info->size; i++) {
216                  if (F_set[i] == 0) {                  if (F_set[i] == 0) {
# Line 220  Paso_Preconditioner_AMG* Paso_Preconditi Line 220  Paso_Preconditioner_AMG* Paso_Preconditi
220              }              }
221              TMPMEMFREE(F_set);              TMPMEMFREE(F_set);
222                    
223  //          if ( n_F == 0 ) {  /*  is a nasty case. a direct solver should be used, return NULL */  /*          if ( n_F == 0 ) {  is a nasty case. a direct solver should be used, return NULL */
224              if (F_flag == 0) {              if (F_flag == 0) {
225             out = NULL;             out = NULL;
226          } else {          } else {
# Line 233  Paso_Preconditioner_AMG* Paso_Preconditi Line 233  Paso_Preconditioner_AMG* Paso_Preconditi
233            out->A_C = NULL;            out->A_C = NULL;
234            out->P = NULL;              out->P = NULL;  
235            out->R = NULL;                      out->R = NULL;          
236  //        out->post_sweeps = options->post_sweeps;            out->post_sweeps = options->post_sweeps;
237  //        out->pre_sweeps  = options->pre_sweeps;            out->pre_sweeps  = options->pre_sweeps;
           out->post_sweeps = 2;  
           out->pre_sweeps = 2;  
238            out->r = NULL;            out->r = NULL;
239            out->x_C = NULL;            out->x_C = NULL;
240            out->b_C = NULL;            out->b_C = NULL;
# Line 362  void Paso_Preconditioner_AMG_solve(Paso_ Line 360  void Paso_Preconditioner_AMG_solve(Paso_
360       double time0=0;       double time0=0;
361       const dim_t post_sweeps=amg->post_sweeps;       const dim_t post_sweeps=amg->post_sweeps;
362       const dim_t pre_sweeps=amg->pre_sweeps;       const dim_t pre_sweeps=amg->pre_sweeps;
      int rank = A->mpi_info->rank;  
363    
364       /* presmoothing */       /* presmoothing */
365       time0=Esys_timer();       time0=Esys_timer();
# Line 425  void Paso_Preconditioner_AMG_setStrongCo Line 422  void Paso_Preconditioner_AMG_setStrongCo
422        
423     index_t iptr, i;     index_t iptr, i;
424     double *threshold_p=NULL;     double *threshold_p=NULL;
    index_t rank=A->mpi_info->rank;  
425    
426     threshold_p = TMPMEMALLOC(2*my_n, double);     threshold_p = TMPMEMALLOC(2*my_n, double);
427        
# Line 475  void Paso_Preconditioner_AMG_setStrongCo Line 471  void Paso_Preconditioner_AMG_setStrongCo
471               kdeg++;               kdeg++;
472                }                }
473             }             }
474  //         #pragma ivdep             #pragma ivdep
475             for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {             for (iptr=A->col_coupleBlock->pattern->ptr[i];iptr<A->col_coupleBlock->pattern->ptr[i+1]; ++iptr) {
476                register index_t j=A->col_coupleBlock->pattern->index[iptr];                register index_t j=A->col_coupleBlock->pattern->index[iptr];
477                if(ABS(A->col_coupleBlock->val[iptr])>threshold) {                if(ABS(A->col_coupleBlock->val[iptr])>threshold) {
# Line 751  void Paso_Preconditioner_AMG_CIJPCoarsen Line 747  void Paso_Preconditioner_AMG_CIJPCoarsen
747    index_t iptr, jptr, kptr;    index_t iptr, jptr, kptr;
748    double *random=NULL, *w=NULL, *Status=NULL;    double *random=NULL, *w=NULL, *Status=NULL;
749    index_t * ST_flag=NULL;    index_t * ST_flag=NULL;
   index_t rank=col_connector->mpi_info->rank;  
750    
751    Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector  ,1);    Paso_Coupler* w_coupler=Paso_Coupler_alloc(col_connector  ,1);
752        
# Line 994  Paso_SparseMatrix* Paso_Preconditioner_A Line 989  Paso_SparseMatrix* Paso_Preconditioner_A
989      for (i=1; i<size; i++) {      for (i=1; i<size; i++) {
990        remote_n = A->row_distribution->first_component[i+1] -        remote_n = A->row_distribution->first_component[i+1] -
991           A->row_distribution->first_component[i];           A->row_distribution->first_component[i];
992          #ifdef ESYS_MPI
993        MPI_Irecv(&(ptr_global[iptr]), remote_n, MPI_INT, i,        MPI_Irecv(&(ptr_global[iptr]), remote_n, MPI_INT, i,
994              A->mpi_info->msg_tag_counter+i,              A->mpi_info->msg_tag_counter+i,
995              A->mpi_info->comm,              A->mpi_info->comm,
996              &mpi_requests[i]);              &mpi_requests[i]);
997          #endif
998        temp_n[i] = remote_n;        temp_n[i] = remote_n;
999        iptr += remote_n;        iptr += remote_n;
1000      }      }
1001        #ifdef ESYS_MPI
1002      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
1003        #endif
1004      A->mpi_info->msg_tag_counter += size;      A->mpi_info->msg_tag_counter += size;
1005    
1006      /* Then, prepare to receive idx and val from other ranks */      /* Then, prepare to receive idx and val from other ranks */
# Line 1021  Paso_SparseMatrix* Paso_Preconditioner_A Line 1020  Paso_SparseMatrix* Paso_Preconditioner_A
1020      offset = n+1;      offset = n+1;
1021      for (i=1; i<size; i++) {      for (i=1; i<size; i++) {
1022        len = temp_len[i];        len = temp_len[i];
1023          #ifdef ESYS_MPI
1024        MPI_Irecv(&(idx_global[iptr]), len, MPI_INT, i,        MPI_Irecv(&(idx_global[iptr]), len, MPI_INT, i,
1025              A->mpi_info->msg_tag_counter+i,              A->mpi_info->msg_tag_counter+i,
1026              A->mpi_info->comm,              A->mpi_info->comm,
1027              &mpi_requests[i]);              &mpi_requests[i]);
1028          #endif
1029        remote_n = temp_n[i];        remote_n = temp_n[i];
1030        for (j=0; j<remote_n; j++) {        for (j=0; j<remote_n; j++) {
1031      ptr_global[j+offset] = ptr_global[j+offset] + iptr;      ptr_global[j+offset] = ptr_global[j+offset] + iptr;
# Line 1036  Paso_SparseMatrix* Paso_Preconditioner_A Line 1037  Paso_SparseMatrix* Paso_Preconditioner_A
1037      MEMFREE(idx);      MEMFREE(idx);
1038      row_block_size = A->mainBlock->row_block_size;      row_block_size = A->mainBlock->row_block_size;
1039      col_block_size = A->mainBlock->col_block_size;      col_block_size = A->mainBlock->col_block_size;
1040        #ifdef ESYS_MPI
1041      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
1042        #endif
1043      A->mpi_info->msg_tag_counter += size;      A->mpi_info->msg_tag_counter += size;
1044      TMPMEMFREE(temp_n);      TMPMEMFREE(temp_n);
1045    
# Line 1051  Paso_SparseMatrix* Paso_Preconditioner_A Line 1054  Paso_SparseMatrix* Paso_Preconditioner_A
1054      iptr = temp_len[0] * block_size;      iptr = temp_len[0] * block_size;
1055      for (i=1; i<size; i++) {      for (i=1; i<size; i++) {
1056        len = temp_len[i];        len = temp_len[i];
1057          #ifdef ESYS_MPI
1058        MPI_Irecv(&(out->val[iptr]), len * block_size, MPI_DOUBLE, i,        MPI_Irecv(&(out->val[iptr]), len * block_size, MPI_DOUBLE, i,
1059                          A->mpi_info->msg_tag_counter+i,                          A->mpi_info->msg_tag_counter+i,
1060                          A->mpi_info->comm,                          A->mpi_info->comm,
1061                          &mpi_requests[i]);                          &mpi_requests[i]);
1062          #endif
1063        iptr += (len * block_size);        iptr += (len * block_size);
1064      }      }
1065      memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);      memcpy(out->val, val, temp_len[0] * sizeof(double) * block_size);
1066      MEMFREE(val);      MEMFREE(val);
1067        #ifdef ESYS_MPI
1068      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);      MPI_Waitall(size-1, &(mpi_requests[1]), mpi_stati);
1069        #endif
1070      A->mpi_info->msg_tag_counter += size;      A->mpi_info->msg_tag_counter += size;
1071      TMPMEMFREE(temp_len);      TMPMEMFREE(temp_len);
1072    } else { /* it's not rank 0 */    } else { /* it's not rank 0 */
1073    
1074      /* First, send out the local ptr */      /* First, send out the local ptr */
1075      tag = A->mpi_info->msg_tag_counter+rank;      tag = A->mpi_info->msg_tag_counter+rank;
1076        #ifdef ESYS_MPI
1077      MPI_Issend(&(ptr[1]), n, MPI_INT, 0, tag, A->mpi_info->comm,      MPI_Issend(&(ptr[1]), n, MPI_INT, 0, tag, A->mpi_info->comm,
1078              &mpi_requests[0]);              &mpi_requests[0]);
1079        #endif
1080    
1081      /* Next, send out the local idx */      /* Next, send out the local idx */
1082      len = ptr[n];      len = ptr[n];
1083      tag += size;      tag += size;
1084        #ifdef ESYS_MPI
1085      MPI_Issend(idx, len, MPI_INT, 0, tag, A->mpi_info->comm,      MPI_Issend(idx, len, MPI_INT, 0, tag, A->mpi_info->comm,
1086              &mpi_requests[1]);              &mpi_requests[1]);
1087        #endif
1088    
1089      /* At last, send out the local val */      /* At last, send out the local val */
1090      len *= block_size;      len *= block_size;
1091      tag += size;      tag += size;
1092        #ifdef ESYS_MPI
1093      MPI_Issend(val, len, MPI_DOUBLE, 0, tag, A->mpi_info->comm,      MPI_Issend(val, len, MPI_DOUBLE, 0, tag, A->mpi_info->comm,
1094              &mpi_requests[2]);              &mpi_requests[2]);
1095    
1096      MPI_Waitall(3, mpi_requests, mpi_stati);      MPI_Waitall(3, mpi_requests, mpi_stati);
1097        #endif
1098      A->mpi_info->msg_tag_counter = tag + size - rank;      A->mpi_info->msg_tag_counter = tag + size - rank;
1099      MEMFREE(ptr);      MEMFREE(ptr);
1100      MEMFREE(idx);      MEMFREE(idx);
# Line 1125  void Paso_Preconditioner_AMG_mergeSolve( Line 1138  void Paso_Preconditioner_AMG_mergeSolve(
1138      offset[i] = p*n_block;      offset[i] = p*n_block;
1139    }    }
1140    count = counts[rank];    count = counts[rank];
1141      #ifdef ESYS_MPI
1142    MPI_Gatherv(amg->b_C, count, MPI_DOUBLE, b, counts, offset, MPI_DOUBLE, 0, A->mpi_info->comm);    MPI_Gatherv(amg->b_C, count, MPI_DOUBLE, b, counts, offset, MPI_DOUBLE, 0, A->mpi_info->comm);
1143    MPI_Gatherv(amg->x_C, count, MPI_DOUBLE, x, counts, offset, MPI_DOUBLE, 0, A->mpi_info->comm);    MPI_Gatherv(amg->x_C, count, MPI_DOUBLE, x, counts, offset, MPI_DOUBLE, 0, A->mpi_info->comm);
1144      #endif
1145    
1146    if (rank == 0) {    if (rank == 0) {
1147      /* solve locally */      /* solve locally */
# Line 1152  void Paso_Preconditioner_AMG_mergeSolve( Line 1167  void Paso_Preconditioner_AMG_mergeSolve(
1167      #endif      #endif
1168    }    }
1169    
1170      #ifdef ESYS_MPI
1171    /* now we need to distribute the solution to all ranks */    /* now we need to distribute the solution to all ranks */
1172    MPI_Scatterv(x, counts, offset, MPI_DOUBLE, amg->x_C, count, MPI_DOUBLE, 0, A->mpi_info->comm);    MPI_Scatterv(x, counts, offset, MPI_DOUBLE, amg->x_C, count, MPI_DOUBLE, 0, A->mpi_info->comm);
1173      #endif
1174    
1175    TMPMEMFREE(x);    TMPMEMFREE(x);
1176    TMPMEMFREE(b);    TMPMEMFREE(b);

Legend:
Removed from v.3827  
changed lines
  Added in v.3828

  ViewVC Help
Powered by ViewVC 1.1.26