/[escript]/trunk-mpi-branch/paso/src/PCG.c
ViewVC logotype

Diff of /trunk-mpi-branch/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1086 by ksteube, Fri Mar 30 01:03:54 2007 UTC revision 1087 by gross, Thu Apr 12 10:01:47 2007 UTC
# Line 68  err_t Paso_Solver_PCG( Line 68  err_t Paso_Solver_PCG(
68      double * x,      double * x,
69      dim_t *iter,      dim_t *iter,
70      double * tolerance,      double * tolerance,
71        double * buffer0, double * buffer1,
72      Paso_Performance* pp) {      Paso_Performance* pp) {
73    
74    
# Line 79  err_t Paso_Solver_PCG( Line 80  err_t Paso_Solver_PCG(
80    dim_t n = A->myNumCols * A-> col_block_size;    dim_t n = A->myNumCols * A-> col_block_size;
81    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
82    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
83    double norm_of_residual,norm_of_residual_global;    double norm_of_residual,norm_of_residual_global, loc_sum[2], sum[2];
84    register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;    register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
85    
86  /*                                                                 */  /*                                                                 */
# Line 140  err_t Paso_Solver_PCG( Line 141  err_t Paso_Solver_PCG(
141             /* tau=v*r    */             /* tau=v*r    */
142             #pragma omp for private(i0) reduction(+:sum_1) schedule(static)             #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
143             for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */             for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */
144  #ifdef PASO_MPI             #ifdef PASO_MPI
145         /* In case we have many MPI processes, each of which may have several OMP threads:              /* In case we have many MPI processes, each of which may have several OMP threads:
146            OMP master participates in an MPI reduction to get global sum_1 */                 OMP master participates in an MPI reduction to get global sum_1 */
147             #pragma omp master                  #pragma omp master
148         {              {
149           int loc_sum_1 = sum_1;                loc_sum[0] = sum_1;
150           MPI_Allreduce(&loc_sum_1, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
151         }              }
152  #endif             #endif
153             tau_old=tau;             tau_old=tau;
154             tau=sum_1;             tau=sum_1;
155             /* p=v+beta*p */             /* p=v+beta*p */
# Line 163  err_t Paso_Solver_PCG( Line 164  err_t Paso_Solver_PCG(
164             /* v=A*p */             /* v=A*p */
165             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
166             Performance_startMonitor(pp,PERFORMANCE_MVM);             Performance_startMonitor(pp,PERFORMANCE_MVM);
167         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v, buffer0, buffer1);
168             Performance_stopMonitor(pp,PERFORMANCE_MVM);             Performance_stopMonitor(pp,PERFORMANCE_MVM);
169             Performance_startMonitor(pp,PERFORMANCE_SOLVER);             Performance_startMonitor(pp,PERFORMANCE_SOLVER);
170             /* delta=p*v */             /* delta=p*v */
171             #pragma omp for private(i0) reduction(+:sum_2) schedule(static)             #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
172             for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];             for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
173  #ifdef PASO_MPI             #ifdef PASO_MPI
174             #pragma omp master                 #pragma omp master
175         {             {
176           int loc_sum_2 = sum_2;               loc_sum[0] = sum_2;
177           MPI_Allreduce(&loc_sum_2, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);               MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
178         }             }
179  #endif             #endif
180             delta=sum_2;             delta=sum_2;
181    
182        
# Line 190  err_t Paso_Solver_PCG( Line 191  err_t Paso_Solver_PCG(
191                       sum_3+=d*d;                       sum_3+=d*d;
192                       sum_4+=d*rs[i0];                       sum_4+=d*rs[i0];
193                 }                 }
194  #ifdef PASO_MPI                 #ifdef PASO_MPI
195                 #pragma omp master                     #pragma omp master
196             {                 {
197               int loc_sum_3 = sum_3;                   loc_sum[0] = sum_3;
198               MPI_Allreduce(&loc_sum_3, &sum_3, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                   loc_sum[1] = sum_4;
199               int loc_sum_4 = sum_4;                   MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
200               MPI_Allreduce(&loc_sum_4, &sum_4, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                   sum_3=sum[0];
201             }                   sum_4=sum[1];
202  #endif                 }
203                    #endif
204                  gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;                  gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
205                  gamma_2= ONE-gamma_1;                  gamma_2= ONE-gamma_1;
206                  #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)                  #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
# Line 209  err_t Paso_Solver_PCG( Line 211  err_t Paso_Solver_PCG(
211                  }                  }
212                  #pragma omp for private(i0) reduction(+:sum_5) schedule(static)                  #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
213                  for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];                  for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
214  #ifdef PASO_MPI                  #ifdef PASO_MPI
215             #pragma omp master                     #pragma omp master
216         {                 {
217           int loc_sum_5 = sum_5;                    loc_sum[0] = sum_5;
218           MPI_Allreduce(&loc_sum_5, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                    MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
219         }                 }
220  #endif                  #endif
221                  norm_of_residual=sqrt(sum_5);                  norm_of_residual=sqrt(sum_5);
222                  convergeFlag = norm_of_residual <= tol;                  convergeFlag = norm_of_residual <= tol;
223                  maxIterFlag = num_iter == maxit;                  maxIterFlag = num_iter == maxit;
# Line 245  err_t Paso_Solver_PCG( Line 247  err_t Paso_Solver_PCG(
247    /*     End of PCG */    /*     End of PCG */
248    return status;    return status;
249  }  }
   
 /*  
  * $Log$  
  * Revision 1.2  2005/09/15 03:44:40  jgs  
  * Merge of development branch dev-02 back to main trunk on 2005-09-15  
  *  
  * Revision 1.1.2.1  2005/09/05 06:29:49  gross  
  * These files have been extracted from finley to define a stand alone libray for iterative  
  * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but  
  * has not been tested yet.  
  *  
  *  
  */  

Legend:
Removed from v.1086  
changed lines
  Added in v.1087

  ViewVC Help
Powered by ViewVC 1.1.26