/[escript]/branches/arrexp_2137_win/paso/src/PCG.c
ViewVC logotype

Contents of /branches/arrexp_2137_win/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 584 - (show annotations)
Thu Mar 9 23:03:38 2006 UTC (13 years, 3 months ago) by gross
Original Path: trunk/paso/src/Solvers/PCG.c
File MIME type: text/plain
File size: 7259 byte(s)
eigenvalues: compiles and passes tests on altix now
1 /* $Id$ */
2
3
4 /* PCG iterations */
5
6 #include "SystemMatrix.h"
7 #include "Paso.h"
8 #include "Solver.h"
9 /* #include <math.h> */
10 #ifdef _OPENMP
11 #include <omp.h>
12 #endif
13
14 /*
15 *
16 * Purpose
17 * =======
18 *
19 * PCG solves the linear system A*x = b using the
20 * preconditioned conjugate gradient method plus a smoother
21 * A has to be symmetric.
22 *
23 * Convergence test: norm( b - A*x )< TOL.
24 * For other measures, see the above reference.
25 *
26 * Arguments
27 * =========
28 *
29 * r (input) DOUBLE PRECISION array, dimension N.
30 * On entry, residual of inital guess x
31 *
32 * x (input/output) DOUBLE PRECISION array, dimension N.
33 * On input, the initial guess.
34 *
35 * ITER (input/output) INT
36 * On input, the maximum iterations to be performed.
37 * On output, actual number of iterations performed.
38 *
39 * INFO (output) INT
40 *
41 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
42 * = SOLVEr_MAXITER_REACHED
43 * = SOLVER_INPUT_ERROR Illegal parameter:
44 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
45 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
46 *
47 * ==============================================================
48 */
49
50 err_t Paso_Solver_PCG(
51 Paso_SystemMatrix * A,
52 double * r,
53 double * x,
54 dim_t *iter,
55 double * tolerance,
56 Paso_Performance* pp) {
57
58
59 /* Local variables */
60 dim_t num_iter=0,maxit,num_iter_global;
61 dim_t i0;
62 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
63 err_t status = SOLVER_NO_ERROR;
64 dim_t n = A->num_cols * A-> col_block_size;
65 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
66 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
67 double norm_of_residual,norm_of_residual_global;
68 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
69
70 /* */
71 /*-----------------------------------------------------------------*/
72 /* */
73 /* Start of Calculation : */
74 /* --------------------- */
75 /* */
76 /* */
77 rs=TMPMEMALLOC(n,double);
78 p=TMPMEMALLOC(n,double);
79 v=TMPMEMALLOC(n,double);
80 x2=TMPMEMALLOC(n,double);
81
82 /* Test the input parameters. */
83
84 if (n < 0) {
85 status = SOLVER_INPUT_ERROR;
86 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
87 status = SOLVER_MEMORY_ERROR;
88 } else {
89 maxit = *iter;
90 tol = *resid;
91 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
92 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
93 {
94 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
95 /* initialize data */
96 #pragma omp for private(i0) schedule(static)
97 for (i0=0;i0<n;i0++) {
98 rs[i0]=r[i0];
99 x2[i0]=x[i0];
100 }
101 #pragma omp for private(i0) schedule(static)
102 for (i0=0;i0<n;i0++) {
103 p[i0]=0;
104 v[i0]=0;
105 }
106 num_iter=0;
107 /* start of iteration */
108 while (!(convergeFlag || maxIterFlag || breakFlag)) {
109 ++(num_iter);
110 #pragma omp barrier
111 #pragma omp master
112 {
113 sum_1 = 0;
114 sum_2 = 0;
115 sum_3 = 0;
116 sum_4 = 0;
117 sum_5 = 0;
118 }
119 /* v=prec(r) */
120 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
121 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
122 Paso_Solver_solvePreconditioner(A,v,r);
123 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
124 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
125 /* tau=v*r */
126 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
127 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];
128 tau_old=tau;
129 tau=sum_1;
130 /* p=v+beta*p */
131 if (num_iter==1) {
132 #pragma omp for private(i0) schedule(static)
133 for (i0=0;i0<n;i0++) p[i0]=v[i0];
134 } else {
135 beta=tau/tau_old;
136 #pragma omp for private(i0) schedule(static)
137 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
138 }
139 /* v=A*p */
140 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
141 Performance_startMonitor(pp,PERFORMANCE_MVM);
142 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
143 Performance_stopMonitor(pp,PERFORMANCE_MVM);
144 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
145 /* delta=p*v */
146 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
147 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
148 delta=sum_2;
149
150
151 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
152 alpha=tau/delta;
153 /* smoother */
154 #pragma omp for private(i0) schedule(static)
155 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
156 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
157 for (i0=0;i0<n;i0++) {
158 d=r[i0]-rs[i0];
159 sum_3+=d*d;
160 sum_4+=d*rs[i0];
161 }
162 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
163 gamma_2= ONE-gamma_1;
164 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
165 for (i0=0;i0<n;++i0) {
166 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
167 x2[i0]+=alpha*p[i0];
168 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
169 }
170 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
171 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
172 norm_of_residual=sqrt(sum_5);
173 convergeFlag = norm_of_residual <= tol;
174 maxIterFlag = num_iter == maxit;
175 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
176 }
177 }
178 /* end of iteration */
179 #pragma omp master
180 {
181 num_iter_global=num_iter;
182 norm_of_residual_global=norm_of_residual;
183 if (maxIterFlag) {
184 status = SOLVER_MAXITER_REACHED;
185 } else if (breakFlag) {
186 status = SOLVER_BREAKDOWN;
187 }
188 }
189 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
190 } /* end of parallel region */
191 TMPMEMFREE(rs);
192 TMPMEMFREE(x2);
193 TMPMEMFREE(v);
194 TMPMEMFREE(p);
195 *iter=num_iter_global;
196 *resid=norm_of_residual_global;
197 }
198 /* End of PCG */
199 return status;
200 }
201
202 /*
203 * $Log$
204 * Revision 1.2 2005/09/15 03:44:40 jgs
205 * Merge of development branch dev-02 back to main trunk on 2005-09-15
206 *
207 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
208 * These files have been extracted from finley to define a stand alone libray for iterative
209 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
210 * has not been tested yet.
211 *
212 *
213 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26