/[escript]/branches/windows_from_1456_trunk_1490_merged_in/paso/src/PCG.c
ViewVC logotype

Contents of /branches/windows_from_1456_trunk_1490_merged_in/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1504 - (show annotations)
Mon Apr 14 12:56:54 2008 UTC (11 years, 4 months ago) by trankine
File MIME type: text/plain
File size: 8622 byte(s)
Initialise tau before its first use to silence the compiler.
1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /* PCG iterations */
17
18 #include "SystemMatrix.h"
19 #include "Paso.h"
20 #include "Solver.h"
21
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25
26 #ifdef PASO_MPI
27 #include <mpi.h>
28 #endif
29
30 /*
31 *
32 * Purpose
33 * =======
34 *
35 * PCG solves the linear system A*x = b using the
36 * preconditioned conjugate gradient method plus a smoother
37 * A has to be symmetric.
38 *
39 * Convergence test: norm( b - A*x )< TOL.
40 * For other measures, see the above reference.
41 *
42 * Arguments
43 * =========
44 *
45 * r (input) DOUBLE PRECISION array, dimension N.
46 * On entry, residual of inital guess x
47 *
48 * x (input/output) DOUBLE PRECISION array, dimension N.
49 * On input, the initial guess.
50 *
51 * ITER (input/output) INT
52 * On input, the maximum iterations to be performed.
53 * On output, actual number of iterations performed.
54 *
55 * INFO (output) INT
56 *
57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 * = SOLVEr_MAXITER_REACHED
59 * = SOLVER_INPUT_ERROR Illegal parameter:
60 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
61 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
62 *
63 * ==============================================================
64 */
65
66 err_t Paso_Solver_PCG(
67 Paso_SystemMatrix * A,
68 double * r,
69 double * x,
70 dim_t *iter,
71 double * tolerance,
72 Paso_Performance* pp) {
73
74
75 /* Local variables */
76 dim_t num_iter=0,maxit,num_iter_global;
77 dim_t i0;
78 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
79 err_t status = SOLVER_NO_ERROR;
80 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
81 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
82 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
83 #ifdef PASO_MPI
84 double loc_sum[2], sum[2];
85 #endif
86 double norm_of_residual,norm_of_residual_global;
87 register double d;
88
89 /* */
90 /*-----------------------------------------------------------------*/
91 /* */
92 /* Start of Calculation : */
93 /* --------------------- */
94 /* */
95 /* */
96 rs=TMPMEMALLOC(n,double);
97 p=TMPMEMALLOC(n,double);
98 v=TMPMEMALLOC(n,double);
99 x2=TMPMEMALLOC(n,double);
100
101 /* Test the input parameters. */
102
103 if (n < 0) {
104 status = SOLVER_INPUT_ERROR;
105 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
106 status = SOLVER_MEMORY_ERROR;
107 } else {
108 maxit = *iter;
109 tol = *resid;
110 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
111 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
112 {
113 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
114 /* initialize data */
115 #pragma omp for private(i0) schedule(static)
116 for (i0=0;i0<n;i0++) {
117 rs[i0]=r[i0];
118 x2[i0]=x[i0];
119 }
120 #pragma omp for private(i0) schedule(static)
121 for (i0=0;i0<n;i0++) {
122 p[i0]=0;
123 v[i0]=0;
124 }
125 num_iter=0;
126 tau = 0;
127 /* start of iteration */
128 while (!(convergeFlag || maxIterFlag || breakFlag)) {
129 ++(num_iter);
130 #pragma omp barrier
131 #pragma omp master
132 {
133 sum_1 = 0;
134 sum_2 = 0;
135 sum_3 = 0;
136 sum_4 = 0;
137 sum_5 = 0;
138 }
139 /* v=prec(r) */
140 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
141 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
142 Paso_Solver_solvePreconditioner(A,v,r);
143 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
144 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
145 /* tau=v*r */
146 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
147 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */
148 #ifdef PASO_MPI
149 /* In case we have many MPI processes, each of which may have several OMP threads:
150 OMP master participates in an MPI reduction to get global sum_1 */
151 #pragma omp master
152 {
153 loc_sum[0] = sum_1;
154 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
155 }
156 #endif
157 tau_old=tau;
158 tau=sum_1;
159 /* p=v+beta*p */
160 if (num_iter==1) {
161 #pragma omp for private(i0) schedule(static)
162 for (i0=0;i0<n;i0++) p[i0]=v[i0];
163 } else {
164 beta=tau/tau_old;
165 #pragma omp for private(i0) schedule(static)
166 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
167 }
168 /* v=A*p */
169 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
170 Performance_startMonitor(pp,PERFORMANCE_MVM);
171 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
172 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
173 Performance_stopMonitor(pp,PERFORMANCE_MVM);
174 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
175 /* delta=p*v */
176 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
177 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
178 #ifdef PASO_MPI
179 #pragma omp master
180 {
181 loc_sum[0] = sum_2;
182 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
183 }
184 #endif
185 delta=sum_2;
186
187
188 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
189 alpha=tau/delta;
190 /* smoother */
191 #pragma omp for private(i0) schedule(static)
192 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
193 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
194 for (i0=0;i0<n;i0++) {
195 d=r[i0]-rs[i0];
196 sum_3+=d*d;
197 sum_4+=d*rs[i0];
198 }
199 #ifdef PASO_MPI
200 #pragma omp master
201 {
202 loc_sum[0] = sum_3;
203 loc_sum[1] = sum_4;
204 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
205 sum_3=sum[0];
206 sum_4=sum[1];
207 }
208 #endif
209 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
210 gamma_2= ONE-gamma_1;
211 #pragma omp for private(i0) schedule(static)
212 for (i0=0;i0<n;++i0) {
213 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
214 x2[i0]+=alpha*p[i0];
215 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
216 }
217 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
218 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
219 #ifdef PASO_MPI
220 #pragma omp master
221 {
222 loc_sum[0] = sum_5;
223 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
224 }
225 #endif
226 norm_of_residual=sqrt(sum_5);
227 convergeFlag = norm_of_residual <= tol;
228 maxIterFlag = num_iter == maxit;
229 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
230 }
231 }
232 /* end of iteration */
233 #pragma omp master
234 {
235 num_iter_global=num_iter;
236 norm_of_residual_global=norm_of_residual;
237 if (maxIterFlag) {
238 status = SOLVER_MAXITER_REACHED;
239 } else if (breakFlag) {
240 status = SOLVER_BREAKDOWN;
241 }
242 }
243 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
244 } /* end of parallel region */
245 TMPMEMFREE(rs);
246 TMPMEMFREE(x2);
247 TMPMEMFREE(v);
248 TMPMEMFREE(p);
249 *iter=num_iter_global;
250 *resid=norm_of_residual_global;
251 }
252 /* End of PCG */
253 return status;
254 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26