/[escript]/trunk/paso/src/Solvers/PCG.c
ViewVC logotype

Contents of /trunk/paso/src/Solvers/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 495 - (show annotations)
Mon Feb 6 06:32:06 2006 UTC (13 years, 4 months ago) by gross
File MIME type: text/plain
File size: 6638 byte(s)
performance monitoring added. complies without PAPI.
1 /* $Id$ */
2
3
4 /* PCG iterations */
5
6 #include "SystemMatrix.h"
7 #include "Paso.h"
8 #include "Solver.h"
9 /* #include <math.h> */
10 #ifdef _OPENMP
11 #include <omp.h>
12 #endif
13
14 /*
15 *
16 * Purpose
17 * =======
18 *
19 * PCG solves the linear system A*x = b using the
20 * preconditioned conjugate gradient method plus a smoother
21 * A has to be symmetric.
22 *
23 * Convergence test: norm( b - A*x )< TOL.
24 * For other measures, see the above reference.
25 *
26 * Arguments
27 * =========
28 *
29 * r (input) DOUBLE PRECISION array, dimension N.
30 * On entry, residual of inital guess x
31 *
32 * x (input/output) DOUBLE PRECISION array, dimension N.
33 * On input, the initial guess.
34 *
35 * ITER (input/output) INT
36 * On input, the maximum iterations to be performed.
37 * On output, actual number of iterations performed.
38 *
39 * INFO (output) INT
40 *
41 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
42 * = SOLVEr_MAXITER_REACHED
43 * = SOLVER_INPUT_ERROR Illegal parameter:
44 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
45 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
46 *
47 * ==============================================================
48 */
49
50 err_t Paso_Solver_PCG(
51 Paso_SystemMatrix * A,
52 double * r,
53 double * x,
54 dim_t *iter,
55 double * tolerance) {
56
57
58 /* Local variables */
59 dim_t num_iter=0,maxit,num_iter_global;
60 dim_t i0;
61 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
62 err_t status = SOLVER_NO_ERROR;
63 dim_t n = A->num_cols * A-> col_block_size;
64 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
65 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
66 double norm_of_residual,norm_of_residual_global;
67 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
68
69 /* */
70 /*-----------------------------------------------------------------*/
71 /* */
72 /* Start of Calculation : */
73 /* --------------------- */
74 /* */
75 /* */
76 rs=TMPMEMALLOC(n,double);
77 p=TMPMEMALLOC(n,double);
78 v=TMPMEMALLOC(n,double);
79 x2=TMPMEMALLOC(n,double);
80
81 /* Test the input parameters. */
82
83 if (n < 0) {
84 status = SOLVER_INPUT_ERROR;
85 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
86 status = SOLVER_MEMORY_ERROR;
87 } else {
88 maxit = *iter;
89 tol = *resid;
90 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
91 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
92 {
93 /* initialize data */
94 #pragma omp for private(i0) schedule(static)
95 for (i0=0;i0<n;i0++) {
96 rs[i0]=r[i0];
97 x2[i0]=x[i0];
98 }
99 #pragma omp for private(i0) schedule(static)
100 for (i0=0;i0<n;i0++) {
101 p[i0]=0;
102 v[i0]=0;
103 }
104 num_iter=0;
105 /* start of iteration */
106 while (!(convergeFlag || maxIterFlag || breakFlag)) {
107 ++(num_iter);
108 #pragma omp barrier
109 #pragma omp master
110 {
111 sum_1 = 0;
112 sum_2 = 0;
113 sum_3 = 0;
114 sum_4 = 0;
115 sum_5 = 0;
116 }
117 /* v=prec(r) */
118 Paso_Solver_solvePreconditioner(A,v,r);
119 /* tau=v*r */
120 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
121 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];
122 tau_old=tau;
123 tau=sum_1;
124 /* p=v+beta*p */
125 if (num_iter==1) {
126 #pragma omp for private(i0) schedule(static)
127 for (i0=0;i0<n;i0++) p[i0]=v[i0];
128 } else {
129 beta=tau/tau_old;
130 #pragma omp for private(i0) schedule(static)
131 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
132 }
133 /* v=A*p */
134 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
135 /* delta=p*v */
136 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
137 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
138 delta=sum_2;
139
140
141 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
142 alpha=tau/delta;
143 /* smoother */
144 #pragma omp for private(i0) schedule(static)
145 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
146 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
147 for (i0=0;i0<n;i0++) {
148 d=r[i0]-rs[i0];
149 sum_3+=d*d;
150 sum_4+=d*rs[i0];
151 }
152 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
153 gamma_2= ONE-gamma_1;
154 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
155 for (i0=0;i0<n;++i0) {
156 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
157 x2[i0]+=alpha*p[i0];
158 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
159 }
160 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
161 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
162 norm_of_residual=sqrt(sum_5);
163 convergeFlag = norm_of_residual <= tol;
164 maxIterFlag = num_iter == maxit;
165 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
166 }
167 }
168 /* end of iteration */
169 #pragma omp master
170 {
171 num_iter_global=num_iter;
172 norm_of_residual_global=norm_of_residual;
173 if (maxIterFlag) {
174 status = SOLVER_MAXITER_REACHED;
175 } else if (breakFlag) {
176 status = SOLVER_BREAKDOWN;
177 }
178 }
179 } /* end of parallel region */
180 TMPMEMFREE(rs);
181 TMPMEMFREE(x2);
182 TMPMEMFREE(v);
183 TMPMEMFREE(p);
184 *iter=num_iter_global;
185 *resid=norm_of_residual_global;
186 }
187 /* End of PCG */
188 return status;
189 }
190
191 /*
192 * $Log$
193 * Revision 1.2 2005/09/15 03:44:40 jgs
194 * Merge of development branch dev-02 back to main trunk on 2005-09-15
195 *
196 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
197 * These files have been extracted from finley to define a stand alone libray for iterative
198 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
199 * has not been tested yet.
200 *
201 *
202 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26