/[escript]/trunk/paso/src/PCG.c
ViewVC logotype

Annotation of /trunk/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1312 - (hide annotations)
Mon Sep 24 06:18:44 2007 UTC (11 years, 9 months ago) by ksteube
File MIME type: text/plain
File size: 8620 byte(s)
The MPI branch is hereby closed. All future work should be in trunk.

Previously in revision 1295 I merged the latest changes to trunk into trunk-mpi-branch.
In this revision I copied all files from trunk-mpi-branch over the corresponding
trunk files. I did not use 'svn merge', it was a copy.

1 ksteube 1312
2 jgs 150 /* $Id$ */
3    
4 ksteube 1312 /*******************************************************
5     *
6     * Copyright 2003-2007 by ACceSS MNRF
7     * Copyright 2007 by University of Queensland
8     *
9     * http://esscc.uq.edu.au
10     * Primary Business: Queensland, Australia
11     * Licensed under the Open Software License version 3.0
12     * http://www.opensource.org/licenses/osl-3.0.php
13     *
14     *******************************************************/
15 gross 495
16 jgs 150 /* PCG iterations */
17    
18 gross 700 #include "SystemMatrix.h"
19     #include "Paso.h"
20 jgs 150 #include "Solver.h"
21 woo409 757
22 jgs 150 #ifdef _OPENMP
23     #include <omp.h>
24     #endif
25    
26 ksteube 1312 #ifdef PASO_MPI
27     #include <mpi.h>
28     #endif
29    
30 jgs 150 /*
31     *
32     * Purpose
33     * =======
34     *
35     * PCG solves the linear system A*x = b using the
36     * preconditioned conjugate gradient method plus a smoother
37     * A has to be symmetric.
38     *
39     * Convergence test: norm( b - A*x )< TOL.
40     * For other measures, see the above reference.
41     *
42     * Arguments
43     * =========
44     *
45     * r (input) DOUBLE PRECISION array, dimension N.
46     * On entry, residual of inital guess x
47     *
48     * x (input/output) DOUBLE PRECISION array, dimension N.
49     * On input, the initial guess.
50     *
51     * ITER (input/output) INT
52     * On input, the maximum iterations to be performed.
53     * On output, actual number of iterations performed.
54     *
55     * INFO (output) INT
56     *
57     * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58     * = SOLVEr_MAXITER_REACHED
59     * = SOLVER_INPUT_ERROR Illegal parameter:
60     * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
61     * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
62     *
63     * ==============================================================
64     */
65    
66     err_t Paso_Solver_PCG(
67     Paso_SystemMatrix * A,
68     double * r,
69     double * x,
70     dim_t *iter,
71 gross 584 double * tolerance,
72     Paso_Performance* pp) {
73 jgs 150
74    
75     /* Local variables */
76     dim_t num_iter=0,maxit,num_iter_global;
77     dim_t i0;
78     bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
79     err_t status = SOLVER_NO_ERROR;
80 ksteube 1312 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
81 jgs 150 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
82     double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
83 ksteube 1312 double norm_of_residual,norm_of_residual_global, loc_sum[2], sum[2];
84 gross 495 register double r_tmp,d,rs_tmp,x2_tmp,x_tmp;
85 jgs 150
86     /* */
87     /*-----------------------------------------------------------------*/
88     /* */
89     /* Start of Calculation : */
90     /* --------------------- */
91     /* */
92     /* */
93     rs=TMPMEMALLOC(n,double);
94     p=TMPMEMALLOC(n,double);
95     v=TMPMEMALLOC(n,double);
96     x2=TMPMEMALLOC(n,double);
97    
98     /* Test the input parameters. */
99    
100     if (n < 0) {
101     status = SOLVER_INPUT_ERROR;
102     } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
103     status = SOLVER_MEMORY_ERROR;
104     } else {
105     maxit = *iter;
106     tol = *resid;
107     #pragma omp parallel firstprivate(maxit,tol,convergeFlag,maxIterFlag,breakFlag) \
108     private(tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,norm_of_residual,num_iter)
109     {
110 gross 584 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
111 jgs 150 /* initialize data */
112     #pragma omp for private(i0) schedule(static)
113     for (i0=0;i0<n;i0++) {
114     rs[i0]=r[i0];
115     x2[i0]=x[i0];
116 gross 495 }
117     #pragma omp for private(i0) schedule(static)
118     for (i0=0;i0<n;i0++) {
119 jgs 150 p[i0]=0;
120     v[i0]=0;
121     }
122     num_iter=0;
123     /* start of iteration */
124     while (!(convergeFlag || maxIterFlag || breakFlag)) {
125     ++(num_iter);
126     #pragma omp barrier
127     #pragma omp master
128     {
129     sum_1 = 0;
130     sum_2 = 0;
131     sum_3 = 0;
132     sum_4 = 0;
133     sum_5 = 0;
134     }
135     /* v=prec(r) */
136 gross 584 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
137     Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
138 jgs 150 Paso_Solver_solvePreconditioner(A,v,r);
139 gross 584 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
140     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
141 jgs 150 /* tau=v*r */
142     #pragma omp for private(i0) reduction(+:sum_1) schedule(static)
143 ksteube 1312 for (i0=0;i0<n;i0++) sum_1+=v[i0]*r[i0]; /* Limit to local values of v[] and r[] */
144     #ifdef PASO_MPI
145     /* In case we have many MPI processes, each of which may have several OMP threads:
146     OMP master participates in an MPI reduction to get global sum_1 */
147     #pragma omp master
148     {
149     loc_sum[0] = sum_1;
150     MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
151     }
152     #endif
153 jgs 150 tau_old=tau;
154     tau=sum_1;
155     /* p=v+beta*p */
156     if (num_iter==1) {
157     #pragma omp for private(i0) schedule(static)
158     for (i0=0;i0<n;i0++) p[i0]=v[i0];
159     } else {
160     beta=tau/tau_old;
161     #pragma omp for private(i0) schedule(static)
162     for (i0=0;i0<n;i0++) p[i0]=v[i0]+beta*p[i0];
163     }
164     /* v=A*p */
165 gross 584 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
166     Performance_startMonitor(pp,PERFORMANCE_MVM);
167 gross 415 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
168 ksteube 1312 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
169 gross 584 Performance_stopMonitor(pp,PERFORMANCE_MVM);
170     Performance_startMonitor(pp,PERFORMANCE_SOLVER);
171 jgs 150 /* delta=p*v */
172     #pragma omp for private(i0) reduction(+:sum_2) schedule(static)
173     for (i0=0;i0<n;i0++) sum_2+=v[i0]*p[i0];
174 ksteube 1312 #ifdef PASO_MPI
175     #pragma omp master
176     {
177     loc_sum[0] = sum_2;
178     MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
179     }
180     #endif
181 jgs 150 delta=sum_2;
182    
183    
184     if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
185     alpha=tau/delta;
186     /* smoother */
187 gross 495 #pragma omp for private(i0) schedule(static)
188     for (i0=0;i0<n;i0++) r[i0]-=alpha*v[i0];
189 jgs 150 #pragma omp for private(i0,d) reduction(+:sum_3,sum_4) schedule(static)
190     for (i0=0;i0<n;i0++) {
191     d=r[i0]-rs[i0];
192 gross 495 sum_3+=d*d;
193     sum_4+=d*rs[i0];
194 ksteube 1312 }
195     #ifdef PASO_MPI
196     #pragma omp master
197     {
198     loc_sum[0] = sum_3;
199     loc_sum[1] = sum_4;
200     MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
201     sum_3=sum[0];
202     sum_4=sum[1];
203     }
204     #endif
205 jgs 150 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
206     gamma_2= ONE-gamma_1;
207 gross 495 #pragma omp for private(i0,x2_tmp,x_tmp,rs_tmp) schedule(static)
208     for (i0=0;i0<n;++i0) {
209     rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
210 jgs 150 x2[i0]+=alpha*p[i0];
211     x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
212 gross 495 }
213     #pragma omp for private(i0) reduction(+:sum_5) schedule(static)
214     for (i0=0;i0<n;++i0) sum_5+=rs[i0]*rs[i0];
215 ksteube 1312 #ifdef PASO_MPI
216     #pragma omp master
217     {
218     loc_sum[0] = sum_5;
219     MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
220     }
221     #endif
222 gross 495 norm_of_residual=sqrt(sum_5);
223     convergeFlag = norm_of_residual <= tol;
224     maxIterFlag = num_iter == maxit;
225     breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
226     }
227 jgs 150 }
228     /* end of iteration */
229     #pragma omp master
230     {
231     num_iter_global=num_iter;
232     norm_of_residual_global=norm_of_residual;
233     if (maxIterFlag) {
234     status = SOLVER_MAXITER_REACHED;
235     } else if (breakFlag) {
236     status = SOLVER_BREAKDOWN;
237     }
238 gross 584 }
239     Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
240 jgs 150 } /* end of parallel region */
241     TMPMEMFREE(rs);
242     TMPMEMFREE(x2);
243     TMPMEMFREE(v);
244     TMPMEMFREE(p);
245     *iter=num_iter_global;
246     *resid=norm_of_residual_global;
247     }
248     /* End of PCG */
249     return status;
250     }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26