/[escript]/trunk/paso/src/PCG.c
ViewVC logotype

Contents of /trunk/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1384 - (show annotations)
Fri Jan 11 02:29:38 2008 UTC (11 years, 6 months ago) by phornby
Original Path: temp_trunk_copy/paso/src/PCG.c
File MIME type: text/plain
File size: 8620 byte(s)
Make a temp copy of the trunk before checking in the windows changes


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /* PCG iterations */
17
18 #include "SystemMatrix.h"
19 #include "Paso.h"
20 #include "Solver.h"
21
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25
26 #ifdef PASO_MPI
27 #include <mpi.h>
28 #endif
29
30 /*
31 *
32 * Purpose
33 * =======
34 *
35 * PCG solves the linear system A*x = b using the
36 * preconditioned conjugate gradient method plus a smoother
37 * A has to be symmetric.
38 *
39 * Convergence test: norm( b - A*x )< TOL.
40 * For other measures, see the above reference.
41 *
42 * Arguments
43 * =========
44 *
45 * r (input) DOUBLE PRECISION array, dimension N.
46 * On entry, residual of inital guess x
47 *
48 * x (input/output) DOUBLE PRECISION array, dimension N.
49 * On input, the initial guess.
50 *
51 * ITER (input/output) INT
52 * On input, the maximum iterations to be performed.
53 * On output, actual number of iterations performed.
54 *
55 * INFO (output) INT
56 *
57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 * = SOLVEr_MAXITER_REACHED
59 * = SOLVER_INPUT_ERROR Illegal parameter:
60 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
61 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
62 *
63 * ==============================================================
64 */
65
66 err_t Paso_Solver_PCG(
67 Paso_SystemMatrix * A,
68 double * r,
69 double * x,
70 dim_t *iter,
71 double * tolerance,
72 Paso_Performance* pp) {
73
74
75 /* Local variables */
76 dim_t num_iter=0,maxit,num_iter_global;
77 dim_t i0;
78 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
79 err_t status = SOLVER_NO_ERROR;
80 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
81 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
82 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
83 double norm_of_residual,norm_of_residual_global, loc_sum[2], sum[2];
84 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
85
86 /* */
87 /*-----------------------------------------------------------------*/
88 /* */
89 /* Start of Calculation : */
90 /* --------------------- */
91 /* */
92 /* */
93 rs=TMPMEMALLOC(n,double);
94 p=TMPMEMALLOC(n,double);
95 v=TMPMEMALLOC(n,double);
96 x2=TMPMEMALLOC(n,double);
97
98 /* Test the input parameters. */
99
100 if (n < 0) {
101 status = SOLVER_INPUT_ERROR;
102 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
103 status = SOLVER_MEMORY_ERROR;
104 } else {
105 maxit = *iter;
106 tol = *resid;
107 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
108 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
109 {
110 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
111 /* initialize data */
112 #pragma omp for private(i0) schedule(static)
113 for (i0=0;i0<n;i0++) {
114 rs[i0]=r[i0];
115 x2[i0]=x[i0];
116 }
117 #pragma omp for private(i0) schedule(static)
118 for (i0=0;i0<n;i0++) {
119 p[i0]=0;
120 v[i0]=0;
121 }
122 num_iter=0;
123 /* start of iteration */
124 while (!(convergeFlag || maxIterFlag || breakFlag)) {
125 ++(num_iter);
126 #pragma omp barrier
127 #pragma omp master
128 {
129 sum_1 = 0;
130 sum_2 = 0;
131 sum_3 = 0;
132 sum_4 = 0;
133 sum_5 = 0;
134 }
135 /* v=prec(r) */
136 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
137 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
138 Paso_Solver_solvePreconditioner(A,v,r);
139 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
140 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
141 /* tau=v*r */
142 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
143 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */
144 #ifdef PASO_MPI
145 /* In case we have many MPI processes, each of which may have several OMP threads:
146 OMP master participates in an MPI reduction to get global sum_1 */
147 #pragma omp master
148 {
149 loc_sum[0] = sum_1;
150 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
151 }
152 #endif
153 tau_old=tau;
154 tau=sum_1;
155 /* p=v+beta*p */
156 if (num_iter==1) {
157 #pragma omp for private(i0) schedule(static)
158 for (i0=0;i0<n;i0++) p[i0]=v[i0];
159 } else {
160 beta=tau/tau_old;
161 #pragma omp for private(i0) schedule(static)
162 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
163 }
164 /* v=A*p */
165 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
166 Performance_startMonitor(pp,PERFORMANCE_MVM);
167 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
168 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
169 Performance_stopMonitor(pp,PERFORMANCE_MVM);
170 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
171 /* delta=p*v */
172 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
173 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
174 #ifdef PASO_MPI
175 #pragma omp master
176 {
177 loc_sum[0] = sum_2;
178 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179 }
180 #endif
181 delta=sum_2;
182
183
184 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
185 alpha=tau/delta;
186 /* smoother */
187 #pragma omp for private(i0) schedule(static)
188 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
189 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
190 for (i0=0;i0<n;i0++) {
191 d=r[i0]-rs[i0];
192 sum_3+=d*d;
193 sum_4+=d*rs[i0];
194 }
195 #ifdef PASO_MPI
196 #pragma omp master
197 {
198 loc_sum[0] = sum_3;
199 loc_sum[1] = sum_4;
200 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
201 sum_3=sum[0];
202 sum_4=sum[1];
203 }
204 #endif
205 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
206 gamma_2= ONE-gamma_1;
207 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
208 for (i0=0;i0<n;++i0) {
209 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
210 x2[i0]+=alpha*p[i0];
211 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
212 }
213 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
214 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
215 #ifdef PASO_MPI
216 #pragma omp master
217 {
218 loc_sum[0] = sum_5;
219 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
220 }
221 #endif
222 norm_of_residual=sqrt(sum_5);
223 convergeFlag = norm_of_residual <= tol;
224 maxIterFlag = num_iter == maxit;
225 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
226 }
227 }
228 /* end of iteration */
229 #pragma omp master
230 {
231 num_iter_global=num_iter;
232 norm_of_residual_global=norm_of_residual;
233 if (maxIterFlag) {
234 status = SOLVER_MAXITER_REACHED;
235 } else if (breakFlag) {
236 status = SOLVER_BREAKDOWN;
237 }
238 }
239 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
240 } /* end of parallel region */
241 TMPMEMFREE(rs);
242 TMPMEMFREE(x2);
243 TMPMEMFREE(v);
244 TMPMEMFREE(p);
245 *iter=num_iter_global;
246 *resid=norm_of_residual_global;
247 }
248 /* End of PCG */
249 return status;
250 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26