/[escript]/branches/arrexp_2137_win/paso/src/PCG.c
ViewVC logotype

Diff of /branches/arrexp_2137_win/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

trunk/paso/src/Solvers/PCG.c revision 415 by gross, Wed Jan 4 05:37:33 2006 UTC temp/paso/src/PCG.c revision 1387 by trankine, Fri Jan 11 07:45:26 2008 UTC
# Line 1  Line 1 
1    
2  /* $Id$ */  /* $Id$ */
3    
4    /*******************************************************
5     *
6     *           Copyright 2003-2007 by ACceSS MNRF
7     *       Copyright 2007 by University of Queensland
8     *
9     *                http://esscc.uq.edu.au
10     *        Primary Business: Queensland, Australia
11     *  Licensed under the Open Software License version 3.0
12     *     http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15    
16  /* PCG iterations */  /* PCG iterations */
17    
18  #include "SystemMatrix.h"  #include "SystemMatrix.h"
19  #include "Paso.h"  #include "Paso.h"
20  #include "Solver.h"  #include "Solver.h"
21  /* #include <math.h> */  
22  #ifdef _OPENMP  #ifdef _OPENMP
23  #include <omp.h>  #include <omp.h>
24  #endif  #endif
25    
26    #ifdef PASO_MPI
27    #include <mpi.h>
28    #endif
29    
30  /*  /*
31  *  *
32  *  Purpose  *  Purpose
# Line 51  err_t Paso_Solver_PCG( Line 68  err_t Paso_Solver_PCG(
68      double * r,      double * r,
69      double * x,      double * x,
70      dim_t *iter,      dim_t *iter,
71      double * tolerance) {      double * tolerance,
72        Paso_Performance* pp) {
73    
74    
75    /* Local variables */    /* Local variables */
# Line 59  err_t Paso_Solver_PCG( Line 77  err_t Paso_Solver_PCG(
77    dim_t i0;    dim_t i0;
78    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;    bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
79    err_t status = SOLVER_NO_ERROR;    err_t status = SOLVER_NO_ERROR;
80    dim_t n = A->num_cols * A-> col_block_size;    dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
81    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;    double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
82    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;    double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
83    double d,norm_of_residual,norm_of_residual_global;    double norm_of_residual,norm_of_residual_global, loc_sum[2], sum[2];
84      register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
85    
86  /*                                                                 */  /*                                                                 */
87  /*-----------------------------------------------------------------*/  /*-----------------------------------------------------------------*/
# Line 88  err_t Paso_Solver_PCG( Line 107  err_t Paso_Solver_PCG(
107      #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \      #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
108                                             private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)                                             private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
109      {      {
110           Performance_startMonitor(pp,PERFORMANCE_SOLVER);
111         /* initialize data */         /* initialize data */
112         #pragma omp for private(i0) schedule(static)         #pragma omp for private(i0) schedule(static)
113         for (i0=0;i0<n;i0++) {         for (i0=0;i0<n;i0++) {
114            rs[i0]=r[i0];            rs[i0]=r[i0];
115            x2[i0]=x[i0];            x2[i0]=x[i0];
116           }
117           #pragma omp for private(i0) schedule(static)
118           for (i0=0;i0<n;i0++) {
119            p[i0]=0;            p[i0]=0;
120            v[i0]=0;            v[i0]=0;
121         }         }
# Line 110  err_t Paso_Solver_PCG( Line 133  err_t Paso_Solver_PCG(
133             sum_5 = 0;             sum_5 = 0;
134             }             }
135             /* v=prec(r)  */             /* v=prec(r)  */
136               Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
137               Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
138             Paso_Solver_solvePreconditioner(A,v,r);             Paso_Solver_solvePreconditioner(A,v,r);
139               Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
140               Performance_startMonitor(pp,PERFORMANCE_SOLVER);
141             /* tau=v*r    */             /* tau=v*r    */
142             #pragma omp for private(i0) reduction(+:sum_1) schedule(static)             #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
143             for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];             for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */
144               #ifdef PASO_MPI
145                /* In case we have many MPI processes, each of which may have several OMP threads:
146                   OMP master participates in an MPI reduction to get global sum_1 */
147                    #pragma omp master
148                {
149                  loc_sum[0] = sum_1;
150                  MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
151                }
152               #endif
153             tau_old=tau;             tau_old=tau;
154             tau=sum_1;             tau=sum_1;
155             /* p=v+beta*p */             /* p=v+beta*p */
# Line 126  err_t Paso_Solver_PCG( Line 162  err_t Paso_Solver_PCG(
162                 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];                 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
163             }             }
164             /* v=A*p */             /* v=A*p */
165               Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
166               Performance_startMonitor(pp,PERFORMANCE_MVM);
167           Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
168         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);         Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
169               Performance_stopMonitor(pp,PERFORMANCE_MVM);
170               Performance_startMonitor(pp,PERFORMANCE_SOLVER);
171             /* delta=p*v */             /* delta=p*v */
172             #pragma omp for private(i0) reduction(+:sum_2) schedule(static)             #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
173             for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];             for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
174               #ifdef PASO_MPI
175                   #pragma omp master
176               {
177                 loc_sum[0] = sum_2;
178                 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179               }
180               #endif
181             delta=sum_2;             delta=sum_2;
182    
183        
184             if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {             if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
185                 alpha=tau/delta;                 alpha=tau/delta;
186                 /* smoother */                 /* smoother */
187                   #pragma omp for private(i0) schedule(static)
188                   for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
189                 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)                 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
190                 for (i0=0;i0<n;i0++) {                 for (i0=0;i0<n;i0++) {
                      r[i0]-=alpha*v[i0];  
191                       d=r[i0]-rs[i0];                       d=r[i0]-rs[i0];
192                       sum_3=sum_3+d*d;                       sum_3+=d*d;
193                       sum_4=sum_4+d*rs[i0];                       sum_4+=d*rs[i0];
194                  }                 }
195                   #ifdef PASO_MPI
196                       #pragma omp master
197                   {
198                     loc_sum[0] = sum_3;
199                     loc_sum[1] = sum_4;
200                     MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
201                     sum_3=sum[0];
202                     sum_4=sum[1];
203                   }
204                    #endif
205                  gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;                  gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
206                  gamma_2= ONE-gamma_1;                  gamma_2= ONE-gamma_1;
207                  #pragma omp for private(i0) reduction(+:sum_5) schedule(static)                  #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
208                  for (i0=0;i0<n;i0++) {                  for (i0=0;i0<n;++i0) {
209                      rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
210                    x2[i0]+=alpha*p[i0];                    x2[i0]+=alpha*p[i0];
211                    x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];                    x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
212                    rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];                  }
213                    sum_5+=rs[i0]*rs[i0];                  #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
214                }                  for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
215                norm_of_residual=sqrt(sum_5);                  #ifdef PASO_MPI
216                convergeFlag = norm_of_residual <= tol;                     #pragma omp master
217                maxIterFlag = num_iter == maxit;                 {
218                breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);                    loc_sum[0] = sum_5;
219            }                    MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
220                   }
221                    #endif
222                    norm_of_residual=sqrt(sum_5);
223                    convergeFlag = norm_of_residual <= tol;
224                    maxIterFlag = num_iter == maxit;
225                    breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
226               }
227         }         }
228         /* end of iteration */         /* end of iteration */
229         #pragma omp master         #pragma omp master
# Line 168  err_t Paso_Solver_PCG( Line 235  err_t Paso_Solver_PCG(
235             } else if (breakFlag) {             } else if (breakFlag) {
236                 status = SOLVER_BREAKDOWN;                 status = SOLVER_BREAKDOWN;
237             }             }
238           }         }
239           Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
240      }  /* end of parallel region */      }  /* end of parallel region */
241      TMPMEMFREE(rs);      TMPMEMFREE(rs);
242      TMPMEMFREE(x2);      TMPMEMFREE(x2);
# Line 180  err_t Paso_Solver_PCG( Line 248  err_t Paso_Solver_PCG(
248    /*     End of PCG */    /*     End of PCG */
249    return status;    return status;
250  }  }
   
 /*  
  * $Log$  
  * Revision 1.2  2005/09/15 03:44:40  jgs  
  * Merge of development branch dev-02 back to main trunk on 2005-09-15  
  *  
  * Revision 1.1.2.1  2005/09/05 06:29:49  gross  
  * These files have been extracted from finley to define a stand alone libray for iterative  
  * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but  
  * has not been tested yet.  
  *  
  *  
  */  

Legend:
Removed from v.415  
changed lines
  Added in v.1387

  ViewVC Help
Powered by ViewVC 1.1.26