1 |
/* $Id$ */ |
2 |
|
3 |
|
4 |
/* |
5 |
******************************************************************************** |
6 |
* Copyright 2006 by ACcESS MNRF * |
7 |
* * |
8 |
* http://www.access.edu.au * |
9 |
* Primary Business: Queensland, Australia * |
10 |
* Licensed under the Open Software License version 3.0 * |
11 |
* http://www.opensource.org/licenses/osl-3.0.php * |
12 |
******************************************************************************** |
13 |
*/ |
14 |
|
15 |
/* PCG iterations */ |
16 |
|
17 |
#include "../SystemMatrix.h" |
18 |
#include "../Paso.h" |
19 |
#include "Solver.h" |
20 |
/* #include <math.h> */ |
21 |
#ifdef _OPENMP |
22 |
#include <omp.h> |
23 |
#endif |
24 |
|
25 |
/* |
26 |
* |
27 |
* Purpose |
28 |
* ======= |
29 |
* |
30 |
* PCG solves the linear system A*x = b using the |
31 |
* preconditioned conjugate gradient method plus a smoother |
32 |
* A has to be symmetric. |
33 |
* |
34 |
* Convergence test: norm( b - A*x )< TOL. |
35 |
* For other measures, see the above reference. |
36 |
* |
37 |
* Arguments |
38 |
* ========= |
39 |
* |
40 |
* r (input) DOUBLE PRECISION array, dimension N. |
41 |
* On entry, residual of inital guess x |
42 |
* |
43 |
* x (input/output) DOUBLE PRECISION array, dimension N. |
44 |
* On input, the initial guess. |
45 |
* |
46 |
* ITER (input/output) INT |
47 |
* On input, the maximum iterations to be performed. |
48 |
* On output, actual number of iterations performed. |
49 |
* |
50 |
* INFO (output) INT |
51 |
* |
52 |
* = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned. |
53 |
* = SOLVEr_MAXITER_REACHED |
54 |
* = SOLVER_INPUT_ERROR Illegal parameter: |
55 |
* = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller |
56 |
* = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller |
57 |
* |
58 |
* ============================================================== |
59 |
*/ |
60 |
|
61 |
err_t Paso_Solver_PCG( |
62 |
Paso_SystemMatrix * A, |
63 |
double * r, |
64 |
double * x, |
65 |
dim_t *iter, |
66 |
double * tolerance, |
67 |
Paso_Performance* pp) { |
68 |
|
69 |
|
70 |
/* Local variables */ |
71 |
dim_t num_iter=0,maxit,num_iter_global; |
72 |
dim_t i0; |
73 |
bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE; |
74 |
err_t status = SOLVER_NO_ERROR; |
75 |
dim_t n = A->num_cols * A-> col_block_size; |
76 |
double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ; |
77 |
double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol; |
78 |
double norm_of_residual,norm_of_residual_global; |
79 |
register double r_tmp,d,rs_tmp,x2_tmp,x_tmp; |
80 |
|
81 |
/* */ |
82 |
/*-----------------------------------------------------------------*/ |
83 |
/* */ |
84 |
/* Start of Calculation : */ |
85 |
/* --------------------- */ |
86 |
/* */ |
87 |
/* */ |
88 |
rs=TMPMEMALLOC(n,double); |
89 |
p=TMPMEMALLOC(n,double); |
90 |
v=TMPMEMALLOC(n,double); |
91 |
x2=TMPMEMALLOC(n,double); |
92 |
|
93 |
/* Test the input parameters. */ |
94 |
|
95 |
if (n < 0) { |
96 |
status = SOLVER_INPUT_ERROR; |
97 |
} else if (rs==NULL || p==NULL || v==NULL || x2==NULL) { |
98 |
status = SOLVER_MEMORY_ERROR; |
99 |
} else { |
100 |
maxit = *iter; |
101 |
tol = *resid; |
102 |
#pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \ |
103 |
private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter) |
104 |
{ |
105 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
106 |
/* initialize data */ |
107 |
#pragma omp for private(i0) schedule(static) |
108 |
for (i0=0;i0<n;i0++) { |
109 |
rs[i0]=r[i0]; |
110 |
x2[i0]=x[i0]; |
111 |
} |
112 |
#pragma omp for private(i0) schedule(static) |
113 |
for (i0=0;i0<n;i0++) { |
114 |
p[i0]=0; |
115 |
v[i0]=0; |
116 |
} |
117 |
num_iter=0; |
118 |
/* start of iteration */ |
119 |
while (!(convergeFlag || maxIterFlag || breakFlag)) { |
120 |
++(num_iter); |
121 |
#pragma omp barrier |
122 |
#pragma omp master |
123 |
{ |
124 |
sum_1 = 0; |
125 |
sum_2 = 0; |
126 |
sum_3 = 0; |
127 |
sum_4 = 0; |
128 |
sum_5 = 0; |
129 |
} |
130 |
/* v=prec(r) */ |
131 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
132 |
Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER); |
133 |
Paso_Solver_solvePreconditioner(A,v,r); |
134 |
Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER); |
135 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
136 |
/* tau=v*r */ |
137 |
#pragma omp for private(i0) reduction(+:sum_1) schedule(static) |
138 |
for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; |
139 |
tau_old=tau; |
140 |
tau=sum_1; |
141 |
/* p=v+beta*p */ |
142 |
if (num_iter==1) { |
143 |
#pragma omp for private(i0) schedule(static) |
144 |
for (i0=0;i0<n;i0++) p[i0]=v[i0]; |
145 |
} else { |
146 |
beta=tau/tau_old; |
147 |
#pragma omp for private(i0) schedule(static) |
148 |
for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0]; |
149 |
} |
150 |
/* v=A*p */ |
151 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
152 |
Performance_startMonitor(pp,PERFORMANCE_MVM); |
153 |
Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v); |
154 |
Performance_stopMonitor(pp,PERFORMANCE_MVM); |
155 |
Performance_startMonitor(pp,PERFORMANCE_SOLVER); |
156 |
/* delta=p*v */ |
157 |
#pragma omp for private(i0) reduction(+:sum_2) schedule(static) |
158 |
for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0]; |
159 |
delta=sum_2; |
160 |
|
161 |
|
162 |
if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) { |
163 |
alpha=tau/delta; |
164 |
/* smoother */ |
165 |
#pragma omp for private(i0) schedule(static) |
166 |
for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0]; |
167 |
#pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static) |
168 |
for (i0=0;i0<n;i0++) { |
169 |
d=r[i0]-rs[i0]; |
170 |
sum_3+=d*d; |
171 |
sum_4+=d*rs[i0]; |
172 |
} |
173 |
gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ; |
174 |
gamma_2= ONE-gamma_1; |
175 |
#pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static) |
176 |
for (i0=0;i0<n;++i0) { |
177 |
rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0]; |
178 |
x2[i0]+=alpha*p[i0]; |
179 |
x[i0]=gamma_2*x[i0]+gamma_1*x2[i0]; |
180 |
} |
181 |
#pragma omp for private(i0) reduction(+:sum_5) schedule(static) |
182 |
for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0]; |
183 |
norm_of_residual=sqrt(sum_5); |
184 |
convergeFlag = norm_of_residual <= tol; |
185 |
maxIterFlag = num_iter == maxit; |
186 |
breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS); |
187 |
} |
188 |
} |
189 |
/* end of iteration */ |
190 |
#pragma omp master |
191 |
{ |
192 |
num_iter_global=num_iter; |
193 |
norm_of_residual_global=norm_of_residual; |
194 |
if (maxIterFlag) { |
195 |
status = SOLVER_MAXITER_REACHED; |
196 |
} else if (breakFlag) { |
197 |
status = SOLVER_BREAKDOWN; |
198 |
} |
199 |
} |
200 |
Performance_stopMonitor(pp,PERFORMANCE_SOLVER); |
201 |
} /* end of parallel region */ |
202 |
TMPMEMFREE(rs); |
203 |
TMPMEMFREE(x2); |
204 |
TMPMEMFREE(v); |
205 |
TMPMEMFREE(p); |
206 |
*iter=num_iter_global; |
207 |
*resid=norm_of_residual_global; |
208 |
} |
209 |
/* End of PCG */ |
210 |
return status; |
211 |
} |
212 |
|
213 |
/* |
214 |
* $Log$ |
215 |
* Revision 1.2 2005/09/15 03:44:40 jgs |
216 |
* Merge of development branch dev-02 back to main trunk on 2005-09-15 |
217 |
* |
218 |
* Revision 1.1.2.1 2005/09/05 06:29:49 gross |
219 |
* These files have been extracted from finley to define a stand alone libray for iterative |
220 |
* linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but |
221 |
* has not been tested yet. |
222 |
* |
223 |
* |
224 |
*/ |