/[escript]/branches/doubleplusgood/paso/src/PCG.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/PCG.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 682 - (show annotations)
Mon Mar 27 02:43:09 2006 UTC (13 years, 10 months ago) by robwdcock
Original Path: trunk/paso/src/Solvers/PCG.c
File MIME type: text/plain
File size: 7910 byte(s)
+ NEW BUILD SYSTEM

This commit contains the new build system with cross-platform support.
Most things work are before though you can have more control.

ENVIRONMENT settings have changed:
+ You no longer require LD_LIBRARY_PATH or PYTHONPATH to point to the
esysroot for building and testing performed via scons
+ ACcESS altix users: It is recommended you change your modules to load
the latest intel compiler and other libraries required by boost to match
the setup in svn (you can override). The correct modules are as follows

module load intel_cc.9.0.026
export
MODULEPATH=${MODULEPATH}:/data/raid2/toolspp4/modulefiles/gcc-3.3.6
module load boost/1.33.0/python-2.4.1
module load python/2.4.1
module load numarray/1.3.3


1 /* $Id$ */
2
3
4 /*
5 ********************************************************************************
6 * Copyright 2006 by ACcESS MNRF *
7 * *
8 * http://www.access.edu.au *
9 * Primary Business: Queensland, Australia *
10 * Licensed under the Open Software License version 3.0 *
11 * http://www.opensource.org/licenses/osl-3.0.php *
12 ********************************************************************************
13 */
14
15 /* PCG iterations */
16
17 #include "../SystemMatrix.h"
18 #include "../Paso.h"
19 #include "Solver.h"
20 /* #include <math.h> */
21 #ifdef _OPENMP
22 #include <omp.h>
23 #endif
24
25 /*
26 *
27 * Purpose
28 * =======
29 *
30 * PCG solves the linear system A*x = b using the
31 * preconditioned conjugate gradient method plus a smoother
32 * A has to be symmetric.
33 *
34 * Convergence test: norm( b - A*x )< TOL.
35 * For other measures, see the above reference.
36 *
37 * Arguments
38 * =========
39 *
40 * r (input) DOUBLE PRECISION array, dimension N.
41 * On entry, residual of inital guess x
42 *
43 * x (input/output) DOUBLE PRECISION array, dimension N.
44 * On input, the initial guess.
45 *
46 * ITER (input/output) INT
47 * On input, the maximum iterations to be performed.
48 * On output, actual number of iterations performed.
49 *
50 * INFO (output) INT
51 *
52 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
53 * = SOLVEr_MAXITER_REACHED
54 * = SOLVER_INPUT_ERROR Illegal parameter:
55 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
56 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
57 *
58 * ==============================================================
59 */
60
61 err_t Paso_Solver_PCG(
62 Paso_SystemMatrix * A,
63 double * r,
64 double * x,
65 dim_t *iter,
66 double * tolerance,
67 Paso_Performance* pp) {
68
69
70 /* Local variables */
71 dim_t num_iter=0,maxit,num_iter_global;
72 dim_t i0;
73 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
74 err_t status = SOLVER_NO_ERROR;
75 dim_t n = A->num_cols * A-> col_block_size;
76 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
77 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
78 double norm_of_residual,norm_of_residual_global;
79 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
80
81 /* */
82 /*-----------------------------------------------------------------*/
83 /* */
84 /* Start of Calculation : */
85 /* --------------------- */
86 /* */
87 /* */
88 rs=TMPMEMALLOC(n,double);
89 p=TMPMEMALLOC(n,double);
90 v=TMPMEMALLOC(n,double);
91 x2=TMPMEMALLOC(n,double);
92
93 /* Test the input parameters. */
94
95 if (n < 0) {
96 status = SOLVER_INPUT_ERROR;
97 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
98 status = SOLVER_MEMORY_ERROR;
99 } else {
100 maxit = *iter;
101 tol = *resid;
102 #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
103 private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
104 {
105 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
106 /* initialize data */
107 #pragma omp for private(i0) schedule(static)
108 for (i0=0;i0<n;i0++) {
109 rs[i0]=r[i0];
110 x2[i0]=x[i0];
111 }
112 #pragma omp for private(i0) schedule(static)
113 for (i0=0;i0<n;i0++) {
114 p[i0]=0;
115 v[i0]=0;
116 }
117 num_iter=0;
118 /* start of iteration */
119 while (!(convergeFlag || maxIterFlag || breakFlag)) {
120 ++(num_iter);
121 #pragma omp barrier
122 #pragma omp master
123 {
124 sum_1 = 0;
125 sum_2 = 0;
126 sum_3 = 0;
127 sum_4 = 0;
128 sum_5 = 0;
129 }
130 /* v=prec(r) */
131 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
132 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
133 Paso_Solver_solvePreconditioner(A,v,r);
134 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
135 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
136 /* tau=v*r */
137 #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
138 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];
139 tau_old=tau;
140 tau=sum_1;
141 /* p=v+beta*p */
142 if (num_iter==1) {
143 #pragma omp for private(i0) schedule(static)
144 for (i0=0;i0<n;i0++) p[i0]=v[i0];
145 } else {
146 beta=tau/tau_old;
147 #pragma omp for private(i0) schedule(static)
148 for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
149 }
150 /* v=A*p */
151 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
152 Performance_startMonitor(pp,PERFORMANCE_MVM);
153 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
154 Performance_stopMonitor(pp,PERFORMANCE_MVM);
155 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
156 /* delta=p*v */
157 #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
158 for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
159 delta=sum_2;
160
161
162 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
163 alpha=tau/delta;
164 /* smoother */
165 #pragma omp for private(i0) schedule(static)
166 for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
167 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
168 for (i0=0;i0<n;i0++) {
169 d=r[i0]-rs[i0];
170 sum_3+=d*d;
171 sum_4+=d*rs[i0];
172 }
173 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
174 gamma_2= ONE-gamma_1;
175 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
176 for (i0=0;i0<n;++i0) {
177 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
178 x2[i0]+=alpha*p[i0];
179 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
180 }
181 #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
182 for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
183 norm_of_residual=sqrt(sum_5);
184 convergeFlag = norm_of_residual <= tol;
185 maxIterFlag = num_iter == maxit;
186 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
187 }
188 }
189 /* end of iteration */
190 #pragma omp master
191 {
192 num_iter_global=num_iter;
193 norm_of_residual_global=norm_of_residual;
194 if (maxIterFlag) {
195 status = SOLVER_MAXITER_REACHED;
196 } else if (breakFlag) {
197 status = SOLVER_BREAKDOWN;
198 }
199 }
200 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
201 } /* end of parallel region */
202 TMPMEMFREE(rs);
203 TMPMEMFREE(x2);
204 TMPMEMFREE(v);
205 TMPMEMFREE(p);
206 *iter=num_iter_global;
207 *resid=norm_of_residual_global;
208 }
209 /* End of PCG */
210 return status;
211 }
212
213 /*
214 * $Log$
215 * Revision 1.2 2005/09/15 03:44:40 jgs
216 * Merge of development branch dev-02 back to main trunk on 2005-09-15
217 *
218 * Revision 1.1.2.1 2005/09/05 06:29:49 gross
219 * These files have been extracted from finley to define a stand alone libray for iterative
220 * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
221 * has not been tested yet.
222 *
223 *
224 */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26