/[escript]/trunk/paso/src/TFQMR.c
ViewVC logotype

Contents of /trunk/paso/src/TFQMR.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3259 - (show annotations)
Mon Oct 11 01:48:14 2010 UTC (9 years ago) by jfenwick
File MIME type: text/plain
File size: 7538 byte(s)
Merging dudley and scons updates from branches

1
2 /*******************************************************
3 *
4 * Copyright (c) 2003-2010 by University of Queensland
5 * Earth Systems Science Computational Center (ESSCC)
6 * http://www.uq.edu.au/esscc
7 *
8 * Primary Business: Queensland, Australia
9 * Licensed under the Open Software License version 3.0
10 * http://www.opensource.org/licenses/osl-3.0.php
11 *
12 *******************************************************/
13
14
15 /* TFQMR iterations */
16
17 #include "SystemMatrix.h"
18 #include "Paso.h"
19 #include "Solver.h"
20 #include "PasoUtil.h"
21
22 #ifdef _OPENMP
23 #include <omp.h>
24 #endif
25
26 #ifdef ESYS_MPI
27 #include <mpi.h>
28 #endif
29
30 /*
31 *
32 * Purpose
33 * =======
34 *
35 * TFQMR solves the linear system A*x = b
36 *
37 * Convergence test: norm( b - A*x )< TOL.
38 * For other measures, see the above reference.
39 *
40 * Arguments
41 * =========
42 *
43 * r (input) DOUBLE PRECISION array, dimension N.
44 *
45 *
46 * x (input/output) DOUBLE PRECISION array, dimension N.
47 *
48 *
49 * ITER (input/output) INT
50 * On input, the maximum iterations to be performed.
51 * On output, actual number of iterations performed.
52 *
53 * INFO (output) INT
54 *
55 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
56 * = SOLVEr_MAXITER_REACHED
57 * = SOLVER_INPUT_ERROR Illegal parameter:
58 * = SOLVEr_BREAKDOWN: If parameters rHO or OMEGA become smaller
59 * = SOLVER_MEMORY_ERROR : If parameters rHO or OMEGA become smaller
60 *
61 * ==============================================================
62 */
63
64 /* #define PASO_DYNAMIC_SCHEDULING_MVM */
65
66 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
67 #define USE_DYNAMIC_SCHEDULING
68 #endif
69
70 err_t Paso_Solver_TFQMR(
71 Paso_SystemMatrix * A,
72 double * r,
73 double * x,
74 dim_t *iter,
75 double * tolerance,
76 Paso_Performance* pp) {
77
78 /* Local variables */
79
80 int m=1;
81 int j=0;
82
83 dim_t num_iter=0,maxit;
84 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
85 err_t status = SOLVER_NO_ERROR;
86 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
87 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *v_old=NULL, *temp_vector=NULL,*res=NULL;
88
89 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
90
91 double norm_of_residual;
92
93 /* */
94 /*-----------------------------------------------------------------*/
95 /* */
96 /* Start of Calculation : */
97 /* --------------------- */
98 /* */
99 /* */
100 u1=TMPMEMALLOC(n,double);
101 u2=TMPMEMALLOC(n,double);
102 y1=TMPMEMALLOC(n,double);
103 y2=TMPMEMALLOC(n,double);
104 d=TMPMEMALLOC(n,double);
105 w=TMPMEMALLOC(n,double);
106 v=TMPMEMALLOC(n,double);
107 v_old=TMPMEMALLOC(n,double);
108 temp_vector=TMPMEMALLOC(n,double);
109 res=TMPMEMALLOC(n,double);
110
111 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL || v_old==NULL) {
112 status=SOLVER_MEMORY_ERROR;
113 }
114
115 maxit = *iter;
116
117 /* Test the input parameters. */
118 if (n < 0 || maxit<=0 ) {
119 status=SOLVER_INPUT_ERROR;
120 }
121
122 Paso_zeroes(n,x);
123
124 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
125 Paso_SystemMatrix_solvePreconditioner(A,res,r);
126 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
127
128 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
129
130 Paso_zeroes(n,u2);
131 Paso_zeroes(n,y2);
132
133 Paso_Copy(n,w,res);
134 Paso_Copy(n,y1,res);
135
136 Paso_zeroes(n,d);
137
138 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
139 Performance_startMonitor(pp,PERFORMANCE_MVM);
140 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
141 Performance_stopMonitor(pp,PERFORMANCE_MVM);
142 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
143
144 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
145 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
146 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
147 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
148 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
149
150 Paso_Copy(n,u1,v);
151
152 theta = 0.0;
153 eta = 0.0;
154
155 tau = Paso_l2(n,res,A->mpi_info);
156
157 rho = tau * tau;
158
159 norm_of_residual=tau*sqrt ( m + 1 );
160
161 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
162 {
163
164
165 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
166
167 if (! (breakFlag = (ABS(sigma) == 0.))) {
168
169 alpha = rho / sigma;
170
171 for (j=0; j<=1; j=j+1)
172 {
173 /* Compute y2 and u2 only if you have to */
174 if ( j == 1 ){
175 Paso_LinearCombination(n,y2,1.,y1,-alpha,v);
176
177 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
178 Performance_startMonitor(pp,PERFORMANCE_MVM);
179 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
180 Performance_stopMonitor(pp,PERFORMANCE_MVM);
181 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
182
183 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
184 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
185 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
186 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
187 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
188 }
189 m = 2 * (num_iter+1) - 2 + (j+1);
190 if (j==0) {
191 Paso_Update(n,1.,w,-alpha,u1);
192 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1);
193 }
194 if (j==1) {
195 Paso_Update(n,1.,w,-alpha,u2);
196 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2);
197 }
198
199 theta =Paso_l2(n,w,A->mpi_info)/tau;
200 c = 1.0 / sqrt ( 1.0 + theta * theta );
201 tau = tau * theta * c;
202 eta = c * c * alpha;
203 Paso_Update(n,1.,x,eta,d);
204 }
205
206 breakFlag = (ABS(rho) == 0);
207
208 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
209 beta = rhon / rho;
210 rho = rhon;
211
212 Paso_LinearCombination(n,y1,1.,w,beta,y2);
213
214 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
215
216 Performance_startMonitor(pp,PERFORMANCE_MVM);
217 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
218 Performance_stopMonitor(pp,PERFORMANCE_MVM);
219
220 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
221 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
222 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
223
224 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
225
226 Paso_Copy(n,v_old,v);
227
228 Paso_Update(n,beta,v_old,1,u2);
229 Paso_LinearCombination(n,v,1.,u1,beta,v_old);
230 }
231 maxIterFlag = (num_iter > maxit);
232 norm_of_residual=tau*sqrt ( m + 1 );
233 convergeFlag=(norm_of_residual<(*tolerance));
234
235
236 if (maxIterFlag) {
237 status = SOLVER_MAXITER_REACHED;
238 } else if (breakFlag) {
239 status = SOLVER_BREAKDOWN;
240 }
241 ++(num_iter);
242 }
243 /* end of iteration */
244
245 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
246 TMPMEMFREE(u1);
247 TMPMEMFREE(u2);
248 TMPMEMFREE(y1);
249 TMPMEMFREE(y2);
250 TMPMEMFREE(d);
251 TMPMEMFREE(w);
252 TMPMEMFREE(v);
253 TMPMEMFREE(v_old);
254 TMPMEMFREE(temp_vector);
255 TMPMEMFREE(res);
256 *iter=num_iter;
257 *tolerance=norm_of_residual;
258
259 /* End of TFQMR */
260 return status;
261 }

  ViewVC Help
Powered by ViewVC 1.1.26