/[escript]/branches/doubleplusgood/paso/src/PCG.cpp
ViewVC logotype

Diff of /branches/doubleplusgood/paso/src/PCG.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1811 by ksteube, Thu Sep 25 23:11:13 2008 UTC revision 3981 by jfenwick, Fri Sep 21 02:47:54 2012 UTC
# Line 1  Line 1 
1    
2  /*******************************************************  /*****************************************************************************
3  *  *
4  * Copyright (c) 2003-2008 by University of Queensland  * Copyright (c) 2003-2012 by University of Queensland
5  * Earth Systems Science Computational Center (ESSCC)  * http://www.uq.edu.au
 * http://www.uq.edu.au/esscc  
6  *  *
7  * Primary Business: Queensland, Australia  * Primary Business: Queensland, Australia
8  * Licensed under the Open Software License version 3.0  * Licensed under the Open Software License version 3.0
9  * http://www.opensource.org/licenses/osl-3.0.php  * http://www.opensource.org/licenses/osl-3.0.php
10  *  *
11  *******************************************************/  * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12    * Development since 2012 by School of Earth Sciences
13    *
14    *****************************************************************************/
15    
16    
17  /* PCG iterations */  /* PCG iterations */
# Line 22  Line 24 
24  #include <omp.h>  #include <omp.h>
25  #endif  #endif
26    
27  #ifdef PASO_MPI  #ifdef ESYS_MPI
28  #include <mpi.h>  #include <mpi.h>
29  #endif  #endif
30    
# Line 32  Line 34 
34  *  =======  *  =======
35  *  *
36  *  PCG solves the linear system A*x = b using the  *  PCG solves the linear system A*x = b using the
37  *  preconditioned conjugate gradient method plus a smoother  *  preconditioned conjugate gradient method plus a smoother.
38  *  A has to be symmetric.  *  A has to be symmetric.
39  *  *
40  *  Convergence test: norm( b - A*x )< TOL.  *  Convergence test: norm( b - A*x )< TOL.
# Line 42  Line 44 
44  *  =========  *  =========
45  *  *
46  *  r       (input) DOUBLE PRECISION array, dimension N.  *  r       (input) DOUBLE PRECISION array, dimension N.
47  *          On entry, residual of inital guess x  *          On entry, residual of initial guess x.
48  *  *
49  *  x       (input/output) DOUBLE PRECISION array, dimension N.  *  x       (input/output) DOUBLE PRECISION array, dimension N.
50  *          On input, the initial guess.  *          On input, the initial guess.
# Line 54  Line 56 
56  *  INFO    (output) INT  *  INFO    (output) INT
57  *  *
58  *          = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.  *          = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
59  *          = SOLVEr_MAXITER_REACHED  *          = SOLVER_MAXITER_REACHED
60  *          = SOLVER_INPUT_ERROR Illegal parameter:  *          = SOLVER_INPUT_ERROR Illegal parameter:
61  *          = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller  *          = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
62  *          = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller  *          = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
63  *  *
64  *  ==============================================================  *  ==============================================================
65  */  */
# Line 77  err_t Paso_Solver_PCG( Line 79  err_t Paso_Solver_PCG(
79      Paso_Performance* pp) {      Paso_Performance* pp) {
80    
81    /* Local variables */    /* Local variables */
82    dim_t num_iter=0,maxit,num_iter_global, chunk_size=-1, len,rest, np, ipp;    dim_t num_iter=0,maxit,num_iter_global, len,rest, np, ipp;
83    #ifdef USE_DYNAMIC_SCHEDULING
84      dim_t chunk_size=-1;
85    #endif
86    register double ss,ss1;    register double ss,ss1;
87    dim_t i0, istart, iend;    dim_t i0, istart, iend;
88    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
# Line 85  err_t Paso_Solver_PCG( Line 90  err_t Paso_Solver_PCG(
90    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
91    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
92    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
93  #ifdef PASO_MPI  #ifdef ESYS_MPI
94    double loc_sum[2], sum[2];    double loc_sum[2], sum[2];
95  #endif  #endif
96    double norm_of_residual,norm_of_residual_global;    double norm_of_residual=0,norm_of_residual_global;
97    register double d;    register double d;
98    
99    /* Should not be any executable code before this ifdef */    /* There should not be any executable code before this ifdef */
100    
101  #ifdef USE_DYNAMIC_SCHEDULING  #ifdef USE_DYNAMIC_SCHEDULING
102    
# Line 162  err_t Paso_Solver_PCG( Line 167  err_t Paso_Solver_PCG(
167      num_iter=0;      num_iter=0;
168    
169      /* PGH */      /* PGH */
170      /* without this we get a use of an unititialised var below */      /* without this we get a use of an uninitialised var below */
171      tau = 0;      tau = 0;
172    
173      /* start of iteration */      /* start of iteration */
# Line 173  err_t Paso_Solver_PCG( Line 178  err_t Paso_Solver_PCG(
178             /* The next lines were commented out before I got here */             /* The next lines were commented out before I got here */
179             /* v=prec(r)  */             /* v=prec(r)  */
180             /* tau=v*r; */             /* tau=v*r; */
181             /* leading to the use of an unititialised var below */             /* leading to the use of an uninitialised var below */
182    
183             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
184             Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);             Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
185             Paso_Solver_solvePreconditioner(A,v,r);         Paso_SystemMatrix_solvePreconditioner(A,v,r);
186             Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);             Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
187             Performance_startMonitor(pp,PERFORMANCE_SOLVER);             Performance_startMonitor(pp,PERFORMANCE_SOLVER);
188    
# Line 208  err_t Paso_Solver_PCG( Line 213  err_t Paso_Solver_PCG(
213                       sum_1+=ss;                       sum_1+=ss;
214                    }                    }
215             }             }
216             #ifdef PASO_MPI             #ifdef ESYS_MPI
217              /* In case we have many MPI processes, each of which may have several OMP threads:              /* In case we have many MPI processes, each of which may have several OMP threads:
218                 OMP master participates in an MPI reduction to get global sum_1 */                 OMP master participates in an MPI reduction to get global sum_1 */
219              loc_sum[0] = sum_1;              loc_sum[0] = sum_1;
# Line 247  err_t Paso_Solver_PCG( Line 252  err_t Paso_Solver_PCG(
252             /* v=A*p */             /* v=A*p */
253             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);             Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
254             Performance_startMonitor(pp,PERFORMANCE_MVM);             Performance_startMonitor(pp,PERFORMANCE_MVM);
255         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, p,PASO_ZERO,v);
256             Performance_stopMonitor(pp,PERFORMANCE_MVM);             Performance_stopMonitor(pp,PERFORMANCE_MVM);
257             Performance_startMonitor(pp,PERFORMANCE_SOLVER);             Performance_startMonitor(pp,PERFORMANCE_SOLVER);
258    
# Line 279  err_t Paso_Solver_PCG( Line 284  err_t Paso_Solver_PCG(
284                               sum_2+=ss;                               sum_2+=ss;
285                    }                    }
286             }             }
287             #ifdef PASO_MPI             #ifdef ESYS_MPI
288            loc_sum[0] = sum_2;            loc_sum[0] = sum_2;
289            MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);            MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
290             #endif             #endif
# Line 323  err_t Paso_Solver_PCG( Line 328  err_t Paso_Solver_PCG(
328                       sum_4+=ss1;                       sum_4+=ss1;
329                    }                    }
330                 }                 }
331                 #ifdef PASO_MPI                 #ifdef ESYS_MPI
332                 loc_sum[0] = sum_3;                 loc_sum[0] = sum_3;
333                 loc_sum[1] = sum_4;                 loc_sum[1] = sum_4;
334                 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
# Line 333  err_t Paso_Solver_PCG( Line 338  err_t Paso_Solver_PCG(
338              sum_5 = 0;              sum_5 = 0;
339                  #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2)                  #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2)
340                  {                  {
341                    gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;                    gamma_1= ( (ABS(sum_3)<= PASO_ZERO) ? 0 : -sum_4/sum_3) ;
342                    gamma_2= ONE-gamma_1;                    gamma_2= PASO_ONE-gamma_1;
343                    ss=0;                    ss=0;
344                    #ifdef USE_DYNAMIC_SCHEDULING                    #ifdef USE_DYNAMIC_SCHEDULING
345                        #pragma omp for schedule(dynamic, 1)                        #pragma omp for schedule(dynamic, 1)
# Line 364  err_t Paso_Solver_PCG( Line 369  err_t Paso_Solver_PCG(
369                        sum_5+=ss;                        sum_5+=ss;
370                    }                    }
371                  }                  }
372                  #ifdef PASO_MPI                  #ifdef ESYS_MPI
373                 loc_sum[0] = sum_5;                 loc_sum[0] = sum_5;
374                 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);                 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
375                  #endif                  #endif
376                  norm_of_residual=sqrt(sum_5);                  norm_of_residual=sqrt(sum_5);
377                  convergeFlag = norm_of_residual <= tol;                  convergeFlag = norm_of_residual <= tol;
378                  maxIterFlag = num_iter == maxit;                  maxIterFlag = num_iter > maxit;
379                  breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);                  breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
380             }             }
381      }      }
# Line 393  err_t Paso_Solver_PCG( Line 398  err_t Paso_Solver_PCG(
398    /*     End of PCG */    /*     End of PCG */
399    return status;    return status;
400  }  }
401    

Legend:
Removed from v.1811  
changed lines
  Added in v.3981

  ViewVC Help
Powered by ViewVC 1.1.26