/[escript]/branches/windows_from_1456_trunk_1580_merged_in/paso/src/PCG.c
ViewVC logotype

Contents of /branches/windows_from_1456_trunk_1580_merged_in/paso/src/PCG.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1593 - (show annotations)
Thu Jun 5 11:01:24 2008 UTC (14 years, 8 months ago) by phornby
File MIME type: text/plain
File size: 13464 byte(s)
Fix the uninitialised use of tau for the second time.


1
2 /* $Id$ */
3
4 /*******************************************************
5 *
6 * Copyright 2003-2007 by ACceSS MNRF
7 * Copyright 2007 by University of Queensland
8 *
9 * http://esscc.uq.edu.au
10 * Primary Business: Queensland, Australia
11 * Licensed under the Open Software License version 3.0
12 * http://www.opensource.org/licenses/osl-3.0.php
13 *
14 *******************************************************/
15
16 /* PCG iterations */
17
18 #include "SystemMatrix.h"
19 #include "Paso.h"
20 #include "Solver.h"
21
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25
26 #ifdef PASO_MPI
27 #include <mpi.h>
28 #endif
29
30 /*
31 *
32 * Purpose
33 * =======
34 *
35 * PCG solves the linear system A*x = b using the
36 * preconditioned conjugate gradient method plus a smoother
37 * A has to be symmetric.
38 *
39 * Convergence test: norm( b - A*x )< TOL.
40 * For other measures, see the above reference.
41 *
42 * Arguments
43 * =========
44 *
45 * r (input) DOUBLE PRECISION array, dimension N.
46 * On entry, residual of inital guess x
47 *
48 * x (input/output) DOUBLE PRECISION array, dimension N.
49 * On input, the initial guess.
50 *
51 * ITER (input/output) INT
52 * On input, the maximum iterations to be performed.
53 * On output, actual number of iterations performed.
54 *
55 * INFO (output) INT
56 *
57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 * = SOLVEr_MAXITER_REACHED
59 * = SOLVER_INPUT_ERROR Illegal parameter:
60 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
61 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
62 *
63 * ==============================================================
64 */
65
66 /* #define PASO_DYNAMIC_SCHEDULING_MVM */
67
68 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
69 #define USE_DYNAMIC_SCHEDULING
70 #endif
71
72 err_t Paso_Solver_PCG(
73 Paso_SystemMatrix * A,
74 double * r,
75 double * x,
76 dim_t *iter,
77 double * tolerance,
78 Paso_Performance* pp) {
79
80 /* Local variables */
81 dim_t num_iter=0,maxit,num_iter_global, chunk_size=-1, len,rest, n_chunks, np, ipp;
82 register double ss,ss1;
83 dim_t i0, istart, iend;
84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
85 err_t status = SOLVER_NO_ERROR;
86 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
87 double *resid = tolerance, *rs=NULL, *p=NULL, *v=NULL, *x2=NULL ;
88 double tau_old,tau,beta,delta,gamma_1,gamma_2,alpha,sum_1,sum_2,sum_3,sum_4,sum_5,tol;
89 #ifdef PASO_MPI
90 double loc_sum[2], sum[2];
91 #endif
92 double norm_of_residual,norm_of_residual_global;
93 register double d;
94 char* chksz_chr;
95 np=omp_get_max_threads();
96
97 #ifdef USE_DYNAMIC_SCHEDULING
98 chksz_chr=getenv("PASO_CHUNK_SIZE_PCG");
99 if (chksz_chr!=NULL) sscanf(chksz_chr, "%d",&chunk_size);
100 chunk_size=MIN(MAX(1,chunk_size),n/np);
101 n_chunks=n/chunk_size;
102 if (n_chunks*chunk_size<n) n_chunks+=1;
103 #else
104 len=n/np;
105 rest=n-len*np;
106 #endif
107 /* */
108 /*-----------------------------------------------------------------*/
109 /* */
110 /* Start of Calculation : */
111 /* --------------------- */
112 /* */
113 /* */
114 rs=TMPMEMALLOC(n,double);
115 p=TMPMEMALLOC(n,double);
116 v=TMPMEMALLOC(n,double);
117 x2=TMPMEMALLOC(n,double);
118
119 /* Test the input parameters. */
120
121 if (n < 0) {
122 status = SOLVER_INPUT_ERROR;
123 } else if (rs==NULL || p==NULL || v==NULL || x2==NULL) {
124 status = SOLVER_MEMORY_ERROR;
125 } else {
126 maxit = *iter;
127 tol = *resid;
128 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
129 /* initialize data */
130 #pragma omp parallel private(i0, istart, iend, ipp)
131 {
132 #ifdef USE_DYNAMIC_SCHEDULING
133 #pragma omp for schedule(dynamic, 1)
134 for (ipp=0; ipp < n_chunks; ++ipp) {
135 istart=chunk_size*ipp;
136 iend=MIN(istart+chunk_size,n);
137 #else
138 #pragma omp for schedule(static)
139 for (ipp=0; ipp <np; ++ipp) {
140 istart=len*ipp+MIN(ipp,rest);
141 iend=len*(ipp+1)+MIN(ipp+1,rest);
142 #endif
143 #pragma ivdep
144 for (i0=istart;i0<iend;i0++) {
145 rs[i0]=r[i0];
146 x2[i0]=x[i0];
147 p[i0]=0;
148 v[i0]=0;
149 }
150 #ifdef USE_DYNAMIC_SCHEDULING
151 }
152 #else
153 }
154 #endif
155 }
156 num_iter=0;
157
158 /* PGH */
159 /* without this we get a use of an unititialised var below */
160 tau = 0;
161
162 /* start of iteration */
163 while (!(convergeFlag || maxIterFlag || breakFlag)) {
164 ++(num_iter);
165
166 /* PGH */
167 /* The next lines were commented out before I got here */
168 /* v=prec(r) */
169 /* tau=v*r; */
170 /* leading to the use of an unititialised var below */
171
172 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
173 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
174 Paso_Solver_solvePreconditioner(A,v,r);
175 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
176 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
177
178 sum_1 = 0;
179 #pragma omp parallel private(i0, istart, iend, ipp, ss)
180 {
181 ss=0;
182 #ifdef USE_DYNAMIC_SCHEDULING
183 #pragma omp for schedule(dynamic, 1)
184 for (ipp=0; ipp < n_chunks; ++ipp) {
185 istart=chunk_size*ipp;
186 iend=MIN(istart+chunk_size,n);
187 #else
188 #pragma omp for schedule(static)
189 for (ipp=0; ipp <np; ++ipp) {
190 istart=len*ipp+MIN(ipp,rest);
191 iend=len*(ipp+1)+MIN(ipp+1,rest);
192 #endif
193 #pragma ivdep
194 for (i0=istart;i0<iend;i0++) ss+=v[i0]*r[i0];
195 #ifdef USE_DYNAMIC_SCHEDULING
196 }
197 #else
198 }
199 #endif
200 #pragma omp critical
201 {
202 sum_1+=ss;
203 }
204 }
205 #ifdef PASO_MPI
206 /* In case we have many MPI processes, each of which may have several OMP threads:
207 OMP master participates in an MPI reduction to get global sum_1 */
208 loc_sum[0] = sum_1;
209 MPI_Allreduce(loc_sum, &sum_1, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
210 #endif
211 tau_old=tau;
212 tau=sum_1;
213 /* p=v+beta*p */
214 #pragma omp parallel private(i0, istart, iend, ipp,beta)
215 {
216 #ifdef USE_DYNAMIC_SCHEDULING
217 #pragma omp for schedule(dynamic, 1)
218 for (ipp=0; ipp < n_chunks; ++ipp) {
219 istart=chunk_size*ipp;
220 iend=MIN(istart+chunk_size,n);
221 #else
222 #pragma omp for schedule(static)
223 for (ipp=0; ipp <np; ++ipp) {
224 istart=len*ipp+MIN(ipp,rest);
225 iend=len*(ipp+1)+MIN(ipp+1,rest);
226 #endif
227 if (num_iter==1) {
228 #pragma ivdep
229 for (i0=istart;i0<iend;i0++) p[i0]=v[i0];
230 } else {
231 beta=tau/tau_old;
232 #pragma ivdep
233 for (i0=istart;i0<iend;i0++) p[i0]=v[i0]+beta*p[i0];
234 }
235 #ifdef USE_DYNAMIC_SCHEDULING
236 }
237 #else
238 }
239 #endif
240 }
241 /* v=A*p */
242 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
243 Performance_startMonitor(pp,PERFORMANCE_MVM);
244 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(ONE, A, p,ZERO,v);
245 Performance_stopMonitor(pp,PERFORMANCE_MVM);
246 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
247
248 /* delta=p*v */
249 sum_2 = 0;
250 #pragma omp parallel private(i0, istart, iend, ipp,ss)
251 {
252 ss=0;
253 #ifdef USE_DYNAMIC_SCHEDULING
254 #pragma omp for schedule(dynamic, 1)
255 for (ipp=0; ipp < n_chunks; ++ipp) {
256 istart=chunk_size*ipp;
257 iend=MIN(istart+chunk_size,n);
258 #else
259 #pragma omp for schedule(static)
260 for (ipp=0; ipp <np; ++ipp) {
261 istart=len*ipp+MIN(ipp,rest);
262 iend=len*(ipp+1)+MIN(ipp+1,rest);
263 #endif
264 #pragma ivdep
265 for (i0=istart;i0<iend;i0++) ss+=v[i0]*p[i0];
266 #ifdef USE_DYNAMIC_SCHEDULING
267 }
268 #else
269 }
270 #endif
271 #pragma omp critical
272 {
273 sum_2+=ss;
274 }
275 }
276 #ifdef PASO_MPI
277 loc_sum[0] = sum_2;
278 MPI_Allreduce(loc_sum, &sum_2, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
279 #endif
280 delta=sum_2;
281 alpha=tau/delta;
282
283 if (! (breakFlag = (ABS(delta) <= TOLERANCE_FOR_SCALARS))) {
284 /* smoother */
285 sum_3 = 0;
286 sum_4 = 0;
287 #pragma omp parallel private(i0, istart, iend, ipp,d, ss, ss1)
288 {
289 ss=0;
290 ss1=0;
291 #ifdef USE_DYNAMIC_SCHEDULING
292 #pragma omp for schedule(dynamic, 1)
293 for (ipp=0; ipp < n_chunks; ++ipp) {
294 istart=chunk_size*ipp;
295 iend=MIN(istart+chunk_size,n);
296 #else
297 #pragma omp for schedule(static)
298 for (ipp=0; ipp <np; ++ipp) {
299 istart=len*ipp+MIN(ipp,rest);
300 iend=len*(ipp+1)+MIN(ipp+1,rest);
301 #endif
302 #pragma ivdep
303 for (i0=istart;i0<iend;i0++) {
304 r[i0]-=alpha*v[i0];
305 d=r[i0]-rs[i0];
306 ss+=d*d;
307 ss1+=d*rs[i0];
308 }
309 #ifdef USE_DYNAMIC_SCHEDULING
310 }
311 #else
312 }
313 #endif
314 #pragma omp critical
315 {
316 sum_3+=ss;
317 sum_4+=ss1;
318 }
319 }
320 #ifdef PASO_MPI
321 loc_sum[0] = sum_3;
322 loc_sum[1] = sum_4;
323 MPI_Allreduce(loc_sum, sum, 2, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
324 sum_3=sum[0];
325 sum_4=sum[1];
326 #endif
327 sum_5 = 0;
328 #pragma omp parallel private(i0, istart, iend, ipp, ss, gamma_1,gamma_2)
329 {
330 gamma_1= ( (ABS(sum_3)<= ZERO) ? 0 : -sum_4/sum_3) ;
331 gamma_2= ONE-gamma_1;
332 ss=0;
333 #ifdef USE_DYNAMIC_SCHEDULING
334 #pragma omp for schedule(dynamic, 1)
335 for (ipp=0; ipp < n_chunks; ++ipp) {
336 istart=chunk_size*ipp;
337 iend=MIN(istart+chunk_size,n);
338 #else
339 #pragma omp for schedule(static)
340 for (ipp=0; ipp <np; ++ipp) {
341 istart=len*ipp+MIN(ipp,rest);
342 iend=len*(ipp+1)+MIN(ipp+1,rest);
343 #endif
344 #pragma ivdep
345 for (i0=istart;i0<iend;i0++) {
346 rs[i0]=gamma_2*rs[i0]+gamma_1*r[i0];
347 x2[i0]+=alpha*p[i0];
348 x[i0]=gamma_2*x[i0]+gamma_1*x2[i0];
349 ss+=rs[i0]*rs[i0];
350 }
351 #ifdef USE_DYNAMIC_SCHEDULING
352 }
353 #else
354 }
355 #endif
356 #pragma omp critical
357 {
358 sum_5+=ss;
359 }
360 }
361 #ifdef PASO_MPI
362 loc_sum[0] = sum_5;
363 MPI_Allreduce(loc_sum, &sum_5, 1, MPI_DOUBLE, MPI_SUM, A->mpi_info->comm);
364 #endif
365 norm_of_residual=sqrt(sum_5);
366 convergeFlag = norm_of_residual <= tol;
367 maxIterFlag = num_iter == maxit;
368 breakFlag = (ABS(tau) <= TOLERANCE_FOR_SCALARS);
369 }
370 }
371 /* end of iteration */
372 num_iter_global=num_iter;
373 norm_of_residual_global=norm_of_residual;
374 if (maxIterFlag) {
375 status = SOLVER_MAXITER_REACHED;
376 } else if (breakFlag) {
377 status = SOLVER_BREAKDOWN;
378 }
379 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
380 TMPMEMFREE(rs);
381 TMPMEMFREE(x2);
382 TMPMEMFREE(v);
383 TMPMEMFREE(p);
384 *iter=num_iter_global;
385 *resid=norm_of_residual_global;
386 }
387 /* End of PCG */
388 return status;
389 }

Properties

Name Value
svn:eol-style native
svn:keywords Author Date Id Revision

  ViewVC Help
Powered by ViewVC 1.1.26