/[escript]/trunk/paso/src/PCG.c
ViewVC logotype

Contents of /trunk/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 757 - (show annotations)
Mon Jun 26 13:12:56 2006 UTC (13 years, 3 months ago) by woo409
File MIME type: text/plain
File size: 7881 byte(s)
+ Merge of intelc_win32 branch (revision 741:755) with trunk. Tested on iVEC altix (run_tests and py_tests all pass)

1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /* PCG iterations */
16
17 #include "SystemMatrix.h"
18 #include "Paso.h"
19 #include "Solver.h"
20
21 #ifdef _OPENMP
22 #include <omp.h>
23 #endif
24
25 /*
26 *
27 * Purpose
28 * =======
29 *
30 * PCG solves the linear system A*x = b using the
31 * preconditioned conjugate gradient method plus a smoother
32 * A has to be symmetric.
33 *
34 * Convergence test: norm( b - A*x )< TOL.
35 * For other measures, see the above reference.
36 *
37 * Arguments
38 * =========
39 *
40 * r (input) DOUBLE PRECISION array, dimension N.
41 * On entry, residual of inital guess x
42 *
43 * x (input/output) DOUBLE PRECISION array, dimension N.
44 * On input, the initial guess.
45 *
46 * ITER (input/output) INT
47 * On input, the maximum iterations to be performed.
48 * On output, actual number of iterations performed.
49 *
50 * INFO (output) INT
51 *
52 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
53 * = SOLVEr_MAXITER_REACHED
54 * = SOLVER_INPUT_ERROR Illegal parameter:
55 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
56 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
57 *
58 * ==============================================================
59 */
60
61 err_t Paso_Solver_PCG(
62 Paso_SystemMatrix * A,
63 double * r,
64 double * x,
65 dim_t *iter,
66 double * tolerance,
67 Paso_Performance* pp) {
68
69
70 /* Local variables */
71 dim_t num_iter=0,maxit,num_iter_global;
72 dim_t i0;
73 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
74 err_t status = SOLVER_NO_ERROR;
75 dim_t n = A->num_cols * A-> col_block_size;
76 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
77 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
78 double norm_of_residual,norm_of_residual_global;
79 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
80
81 /* */
82 /*-----------------------------------------------------------------*/
83 /* */
84 /* Start of Calculation : */
85 /* --------------------- */
86 /* */
87 /* */
88 rs=TMPMEMALLOC(n,double);
89 p=TMPMEMALLOC(n,double);
90 v=TMPMEMALLOC(n,double);
91 x2=TMPMEMALLOC(n,double);
92
93 /* Test the input parameters. */
94
95 if (n < 0) {
96 status = SOLVER_INPUT_ERROR;
97 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
98 status = SOLVER_MEMORY_ERROR;
99 } else {
100 maxit = *iter;
101 tol = *resid;
102 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
103 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
104 {
105 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
106 /* initialize data */
107 #pragma omp for private(i0) schedule(static)
108 for (i0=0;i0<n;i0++) {
109 rs[i0]=r[i0];
110 x2[i0]=x[i0];
111 }
112 #pragma omp for private(i0) schedule(static)
113 for (i0=0;i0<n;i0++) {
114 p[i0]=0;
115 v[i0]=0;
116 }
117 num_iter=0;
118 /* start of iteration */
119 while (!(convergeFlag || maxIterFlag || breakFlag)) {
120 ++(num_iter);
121 #pragma omp barrier
122 #pragma omp master
123 {
124 sum_1 = 0;
125 sum_2 = 0;
126 sum_3 = 0;
127 sum_4 = 0;
128 sum_5 = 0;
129 }
130 /* v=prec(r) */
131 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
132 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
133 Paso_Solver_solvePreconditioner(A,v,r);
134 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
135 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
136 /* tau=v*r */
137 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
138 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];
139 tau_old=tau;
140 tau=sum_1;
141 /* p=v+beta*p */
142 if (num_iter==1) {
143 #pragma omp for private(i0) schedule(static)
144 for (i0=0;i0<n;i0++) p[i0]=v[i0];
145 } else {
146 beta=tau/tau_old;
147 #pragma omp for private(i0) schedule(static)
148 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
149 }
150 /* v=A*p */
151 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
152 Performance_startMonitor(pp,PERFORMANCE_MVM);
153 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
154 Performance_stopMonitor(pp,PERFORMANCE_MVM);
155 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
156 /* delta=p*v */
157 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
158 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
159 delta=sum_2;
160
161
162 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
163 alpha=tau/delta;
164 /* smoother */
165 #pragma omp for private(i0) schedule(static)
166 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
167 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
168 for (i0=0;i0<n;i0++) {
169 d=r[i0]-rs[i0];
170 sum_3+=d*d;
171 sum_4+=d*rs[i0];
172 }
173 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
174 gamma_2= ONE-gamma_1;
175 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
176 for (i0=0;i0<n;++i0) {
177 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
178 x2[i0]+=alpha*p[i0];
179 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
180 }
181 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
182 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
183 norm_of_residual=sqrt(sum_5);
184 convergeFlag = norm_of_residual <= tol;
185 maxIterFlag = num_iter == maxit;
186 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
187 }
188 }
189 /* end of iteration */
190 #pragma omp master
191 {
192 num_iter_global=num_iter;
193 norm_of_residual_global=norm_of_residual;
194 if (maxIterFlag) {
195 status = SOLVER_MAXITER_REACHED;
196 } else if (breakFlag) {
197 status = SOLVER_BREAKDOWN;
198 }
199 }
200 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
201 } /* end of parallel region */
202 TMPMEMFREE(rs);
203 TMPMEMFREE(x2);
204 TMPMEMFREE(v);
205 TMPMEMFREE(p);
206 *iter=num_iter_global;
207 *resid=norm_of_residual_global;
208 }
209 /* End of PCG */
210 return status;
211 }
212
213 /*
214 * $Log$
215 * Revision 1.2 2005/09/15 03:44:40 jgs
216 * Merge of development branch dev-02 back to main trunk on 2005-09-15
217 *
218 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
219 * These files have been extracted from finley to define a stand alone libray for iterative
220 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
221 * has not been tested yet.
222 *
223 *
224 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26