/[escript]/trunk/paso/src/PCG.c
ViewVC logotype

Annotation of /trunk/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 700 - (hide annotations)
Thu Apr 6 00:13:40 2006 UTC (13 years, 3 months ago) by gross
File MIME type: text/plain
File size: 7904 byte(s)
A few changes in the build mechanism and the file structure so scons can build release tar files:

  * paso/src/Solver has been moved to paso/src 
  * all test_.py are now run_.py files and are assumed to be passing python tests. they can run by 
    scons py_tests and are part of the release test set
  * escript/py_src/test_ are moved to escript/test/python and are installed in to the build directory 
    (rather then the PYTHONPATH).
  * all py files in test/python which don't start with run_ or test_ are now 'local_py_tests'. they are installed i
    by not run automatically.
  * CppUnitTest is now treated as a escript module (against previous decisions).
  * scons realse builds nor tar/zip files with relvant source code (src and tests in seperate files)

the python tests don't pass yet due to path problems.


1 jgs 150 /* $Id$ */
2    
3 gross 495
4 dhawcroft 631 /*
5     ********************************************************************************
6 dhawcroft 633 * Copyright 2006 by ACcESS MNRF *
7 dhawcroft 631 * *
8     * http://www.access.edu.au *
9     * Primary Business: Queensland, Australia *
10     * Licensed under the Open Software License version 3.0 *
11     * http://www.opensource.org/licenses/osl-3.0.php *
12     ********************************************************************************
13     */
14    
15 jgs 150 /* PCG iterations */
16    
17 gross 700 #include "SystemMatrix.h"
18     #include "Paso.h"
19 jgs 150 #include "Solver.h"
20     /* #include <math.h> */
21     #ifdef _OPENMP
22     #include <omp.h>
23     #endif
24    
25     /*
26     *
27     * Purpose
28     * =======
29     *
30     * PCG solves the linear system A*x = b using the
31     * preconditioned conjugate gradient method plus a smoother
32     * A has to be symmetric.
33     *
34     * Convergence test: norm( b - A*x )< TOL.
35     * For other measures, see the above reference.
36     *
37     * Arguments
38     * =========
39     *
40     * r (input) DOUBLE PRECISION array, dimension N.
41     * On entry, residual of inital guess x
42     *
43     * x (input/output) DOUBLE PRECISION array, dimension N.
44     * On input, the initial guess.
45     *
46     * ITER (input/output) INT
47     * On input, the maximum iterations to be performed.
48     * On output, actual number of iterations performed.
49     *
50     * INFO (output) INT
51     *
52     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
53     * = SOLVEr_MAXITER_REACHED
54     * = SOLVER_INPUT_ERROR Illegal parameter:
55     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
56     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
57     *
58     * ==============================================================
59     */
60    
61     err_t Paso_Solver_PCG(
62     Paso_SystemMatrix * A,
63     double * r,
64     double * x,
65     dim_t *iter,
66 gross 584 double * tolerance,
67     Paso_Performance* pp) {
68 jgs 150
69    
70     /* Local variables */
71     dim_t num_iter=0,maxit,num_iter_global;
72     dim_t i0;
73     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
74     err_t status = SOLVER_NO_ERROR;
75     dim_t n = A->num_cols * A-> col_block_size;
76     double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
77     double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
78 gross 495 double norm_of_residual,norm_of_residual_global;
79     register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
80 jgs 150
81     /* */
82     /*-----------------------------------------------------------------*/
83     /* */
84     /* Start of Calculation : */
85     /* --------------------- */
86     /* */
87     /* */
88     rs=TMPMEMALLOC(n,double);
89     p=TMPMEMALLOC(n,double);
90     v=TMPMEMALLOC(n,double);
91     x2=TMPMEMALLOC(n,double);
92    
93     /* Test the input parameters. */
94    
95     if (n < 0) {
96     status = SOLVER_INPUT_ERROR;
97     } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
98     status = SOLVER_MEMORY_ERROR;
99     } else {
100     maxit = *iter;
101     tol = *resid;
102     #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
103     private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
104     {
105 gross 584 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
106 jgs 150 /* initialize data */
107     #pragma omp for private(i0) schedule(static)
108     for (i0=0;i0<n;i0++) {
109     rs[i0]=r[i0];
110     x2[i0]=x[i0];
111 gross 495 }
112     #pragma omp for private(i0) schedule(static)
113     for (i0=0;i0<n;i0++) {
114 jgs 150 p[i0]=0;
115     v[i0]=0;
116     }
117     num_iter=0;
118     /* start of iteration */
119     while (!(convergeFlag || maxIterFlag || breakFlag)) {
120     ++(num_iter);
121     #pragma omp barrier
122     #pragma omp master
123     {
124     sum_1 = 0;
125     sum_2 = 0;
126     sum_3 = 0;
127     sum_4 = 0;
128     sum_5 = 0;
129     }
130     /* v=prec(r) */
131 gross 584 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
132     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
133 jgs 150 Paso_Solver_solvePreconditioner(A,v,r);
134 gross 584 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
135     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
136 jgs 150 /* tau=v*r */
137     #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
138     for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0];
139     tau_old=tau;
140     tau=sum_1;
141     /* p=v+beta*p */
142     if (num_iter==1) {
143     #pragma omp for private(i0) schedule(static)
144     for (i0=0;i0<n;i0++) p[i0]=v[i0];
145     } else {
146     beta=tau/tau_old;
147     #pragma omp for private(i0) schedule(static)
148     for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
149     }
150     /* v=A*p */
151 gross 584 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
152     Performance_startMonitor(pp,PERFORMANCE_MVM);
153 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
154 gross 584 Performance_stopMonitor(pp,PERFORMANCE_MVM);
155     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
156 jgs 150 /* delta=p*v */
157     #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
158     for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
159     delta=sum_2;
160    
161    
162     if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
163     alpha=tau/delta;
164     /* smoother */
165 gross 495 #pragma omp for private(i0) schedule(static)
166     for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
167 jgs 150 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
168     for (i0=0;i0<n;i0++) {
169     d=r[i0]-rs[i0];
170 gross 495 sum_3+=d*d;
171     sum_4+=d*rs[i0];
172 jgs 150 }
173     gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
174     gamma_2= ONE-gamma_1;
175 gross 495 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
176     for (i0=0;i0<n;++i0) {
177     rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
178 jgs 150 x2[i0]+=alpha*p[i0];
179     x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
180 gross 495 }
181     #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
182     for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
183     norm_of_residual=sqrt(sum_5);
184     convergeFlag = norm_of_residual <= tol;
185     maxIterFlag = num_iter == maxit;
186     breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
187     }
188 jgs 150 }
189     /* end of iteration */
190     #pragma omp master
191     {
192     num_iter_global=num_iter;
193     norm_of_residual_global=norm_of_residual;
194     if (maxIterFlag) {
195     status = SOLVER_MAXITER_REACHED;
196     } else if (breakFlag) {
197     status = SOLVER_BREAKDOWN;
198     }
199 gross 584 }
200     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
201 jgs 150 } /* end of parallel region */
202     TMPMEMFREE(rs);
203     TMPMEMFREE(x2);
204     TMPMEMFREE(v);
205     TMPMEMFREE(p);
206     *iter=num_iter_global;
207     *resid=norm_of_residual_global;
208     }
209     /* End of PCG */
210     return status;
211     }
212    
213     /*
214     * $Log$
215     * Revision 1.2 2005/09/15 03:44:40 jgs
216     * Merge of development branch dev-02 back to main trunk on 2005-09-15
217     *
218     * Revision 1.1.2.1 2005/09/05 06:29:49 gross
219     * These files have been extracted from finley to define a stand alone libray for iterative
220     * linear solvers on the ALTIX. main entry through Paso_solve. this version compiles but
221     * has not been tested yet.
222     *
223     *
224     */

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26