/[escript]/branches/doubleplusgood/paso/src/PCG.cpp
ViewVC logotype

Diff of /branches/doubleplusgood/paso/src/PCG.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1811 by ksteube, Thu Sep 25 23:11:13 2008 UTC revision 3259 by jfenwick, Mon Oct 11 01:48:14 2010 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*******************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2010 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * Earth Systems Science Computational Center (ESSCC)
6  * http://www.uq.edu.au/esscc  * http://www.uq.edu.au/esscc
7  *  *
# Line 22  Line 22 
22  #include <omp.h>  #include <omp.h>
23  #endif  #endif
24    
25  #ifdef PASO_MPI  #ifdef ESYS_MPI
26  #include <mpi.h>  #include <mpi.h>
27  #endif  #endif
28    
# Line 77  err_t Paso_Solver_PCG( Line 77  err_t Paso_Solver_PCG(
77      Paso_Performance* pp) {      Paso_Performance* pp) {
78    
79    /* Local variables */    /* Local variables */
80    dim_t num_iter=0,maxit,num_iter_global, chunk_size=-1, len,rest, np, ipp;    dim_t num_iter=0,maxit,num_iter_global, len,rest, np, ipp;
81    #ifdef USE_DYNAMIC_SCHEDULING
82      dim_t chunk_size=-1;
83    #endif
84    register double ss,ss1;    register double ss,ss1;
85    dim_t i0, istart, iend;    dim_t i0, istart, iend;
86    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
# Line 85  err_t Paso_Solver_PCG( Line 88  err_t Paso_Solver_PCG(
88    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
89    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
90    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
91  #ifdef PASO_MPI  #ifdef ESYS_MPI
92    double loc_sum[2], sum[2];    double loc_sum[2], sum[2];
93  #endif  #endif
94    double norm_of_residual,norm_of_residual_global;    double norm_of_residual=0,norm_of_residual_global;
95    register double d;    register double d;
96    
97    /* Should not be any executable code before this ifdef */    /* Should not be any executable code before this ifdef */
# Line 177  err_t Paso_Solver_PCG( Line 180  err_t Paso_Solver_PCG(
180    
181             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
182             Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);             Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
183             Paso_Solver_solvePreconditioner(A,v,r);         Paso_SystemMatrix_solvePreconditioner(A,v,r);
184             Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);             Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
185             Performance_startMonitor(pp,PERFORMANCE_SOLVER);             Performance_startMonitor(pp,PERFORMANCE_SOLVER);
186    
# Line 208  err_t Paso_Solver_PCG( Line 211  err_t Paso_Solver_PCG(
211                       sum_1+=ss;                       sum_1+=ss;
212                    }                    }
213             }             }
214             #ifdef PASO_MPI             #ifdef ESYS_MPI
215              /* In case we have many MPI processes, each of which may have several OMP threads:              /* In case we have many MPI processes, each of which may have several OMP threads:
216                 OMP master participates in an MPI reduction to get global sum_1 */                 OMP master participates in an MPI reduction to get global sum_1 */
217              loc_sum[0] = sum_1;              loc_sum[0] = sum_1;
# Line 247  err_t Paso_Solver_PCG( Line 250  err_t Paso_Solver_PCG(
250             /* v=A*p */             /* v=A*p */
251             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
252             Performance_startMonitor(pp,PERFORMANCE_MVM);             Performance_startMonitor(pp,PERFORMANCE_MVM);
253         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, p,PASO_ZERO,v);
254             Performance_stopMonitor(pp,PERFORMANCE_MVM);             Performance_stopMonitor(pp,PERFORMANCE_MVM);
255             Performance_startMonitor(pp,PERFORMANCE_SOLVER);             Performance_startMonitor(pp,PERFORMANCE_SOLVER);
256    
# Line 279  err_t Paso_Solver_PCG( Line 282  err_t Paso_Solver_PCG(
282                               sum_2+=ss;                               sum_2+=ss;
283                    }                    }
284             }             }
285             #ifdef PASO_MPI             #ifdef ESYS_MPI
286            loc_sum[0] = sum_2;            loc_sum[0] = sum_2;
287            MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);            MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
288             #endif             #endif
# Line 323  err_t Paso_Solver_PCG( Line 326  err_t Paso_Solver_PCG(
326                       sum_4+=ss1;                       sum_4+=ss1;
327                    }                    }
328                 }                 }
329                 #ifdef PASO_MPI                 #ifdef ESYS_MPI
330                 loc_sum[0] = sum_3;                 loc_sum[0] = sum_3;
331                 loc_sum[1] = sum_4;                 loc_sum[1] = sum_4;
332                 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
# Line 333  err_t Paso_Solver_PCG( Line 336  err_t Paso_Solver_PCG(
336              sum_5 = 0;              sum_5 = 0;
337                  #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2)                  #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2)
338                  {                  {
339                    gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;                    gamma_1= ( (ABS(sum_3)<= PASO_ZERO) ? 0 : -sum_4/sum_3) ;
340                    gamma_2= ONE-gamma_1;                    gamma_2= PASO_ONE-gamma_1;
341                    ss=0;                    ss=0;
342                    #ifdef USE_DYNAMIC_SCHEDULING                    #ifdef USE_DYNAMIC_SCHEDULING
343                        #pragma omp for schedule(dynamic, 1)                        #pragma omp for schedule(dynamic, 1)
# Line 364  err_t Paso_Solver_PCG( Line 367  err_t Paso_Solver_PCG(
367                        sum_5+=ss;                        sum_5+=ss;
368                    }                    }
369                  }                  }
370                  #ifdef PASO_MPI                  #ifdef ESYS_MPI
371                 loc_sum[0] = sum_5;                 loc_sum[0] = sum_5;
372                 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
373                  #endif                  #endif
374                  norm_of_residual=sqrt(sum_5);                  norm_of_residual=sqrt(sum_5);
375                  convergeFlag = norm_of_residual <= tol;                  convergeFlag = norm_of_residual <= tol;
376                  maxIterFlag = num_iter == maxit;                  maxIterFlag = num_iter > maxit;
377                  breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);                  breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
378             }             }
379      }      }

Legend:
Removed from v.1811  
changed lines
  Added in v.3259

  ViewVC Help
Powered by ViewVC 1.1.26