/[escript]/branches/doubleplusgood/paso/src/TFQMR.cpp
ViewVC logotype

Contents of /branches/doubleplusgood/paso/src/TFQMR.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4275 - (show annotations)
Tue Mar 5 09:32:52 2013 UTC (6 years, 1 month ago) by jfenwick
File size: 7928 byte(s)
some experiments
1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2013 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development since 2012 by School of Earth Sciences
13 *
14 *****************************************************************************/
15
16
17 /* TFQMR iterations */
18
19 #include "SystemMatrix.h"
20 #include "Paso.h"
21 #include "Solver.h"
22 #include "PasoUtil.h"
23
24 #ifdef _OPENMP
25 #include <omp.h>
26 #endif
27
28 #ifdef ESYS_MPI
29 #include <mpi.h>
30 #endif
31
32 /*
33 *
34 * Purpose
35 * =======
36 *
37 * TFQMR solves the linear system A*x = b
38 *
39 * Convergence test: norm( b - A*x )< TOL.
40 * For other measures, see the above reference.
41 *
42 * Arguments
43 * =========
44 *
45 * r (input) DOUBLE PRECISION array, dimension N.
46 *
47 *
48 * x (input/output) DOUBLE PRECISION array, dimension N.
49 *
50 *
51 * ITER (input/output) INT
52 * On input, the maximum iterations to be performed.
53 * On output, actual number of iterations performed.
54 *
55 * INFO (output) INT
56 *
57 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
58 * = SOLVER_MAXITER_REACHED
59 * = SOLVER_INPUT_ERROR Illegal parameter:
60 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
61 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
62 *
63 * ==============================================================
64 */
65
66 /* #define PASO_DYNAMIC_SCHEDULING_MVM */
67
68 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
69 #define USE_DYNAMIC_SCHEDULING
70 #endif
71
72 err_t Paso_Solver_TFQMR(
73 Paso_SystemMatrix * A,
74 double * r,
75 double * x,
76 dim_t *iter,
77 double * tolerance,
78 Paso_Performance* pp) {
79
80 /* Local variables */
81
82 int m=1;
83 int j=0;
84
85 dim_t num_iter=0,maxit;
86 bool_t breakFlag=FALSE, maxIterFlag=FALSE, convergeFlag=FALSE;
87 err_t status = SOLVER_NO_ERROR;
88 dim_t n = Paso_SystemMatrix_getTotalNumRows(A);
89 double *u1=NULL, *u2=NULL, *y1=NULL, *y2=NULL, *d=NULL, *w=NULL, *v=NULL, *temp_vector=NULL,*res=NULL;
90
91 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
92
93 double norm_of_residual;
94
95 /* */
96 /*-----------------------------------------------------------------*/
97 /* */
98 /* Start of Calculation : */
99 /* --------------------- */
100 /* */
101 /* */
102 u1=new double[n];
103 u2=new double[n];
104 y1=new double[n];
105 y2=new double[n];
106 d=new double[n];
107 w=new double[n];
108 v=new double[n];
109 temp_vector=new double[n];
110 res=new double[n];
111
112 if (u1 ==NULL || u2== NULL || y1 == NULL || y2== NULL || d==NULL || w==NULL || v==NULL ) {
113 status=SOLVER_MEMORY_ERROR;
114 }
115
116 maxit = *iter;
117
118 /* Test the input parameters. */
119 if (n < 0 || maxit<=0 ) {
120 status=SOLVER_INPUT_ERROR;
121 }
122
123 Paso_zeroes(n,x);
124
125 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
126 Paso_SystemMatrix_solvePreconditioner(A,res,r);
127 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
128
129 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
130
131 Paso_zeroes(n,u2);
132 Paso_zeroes(n,y2);
133
134 Paso_Copy(n,w,res);
135 Paso_Copy(n,y1,res);
136
137 Paso_zeroes(n,d);
138
139 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
140 Performance_startMonitor(pp,PERFORMANCE_MVM);
141 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
142 Performance_stopMonitor(pp,PERFORMANCE_MVM);
143 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
144
145 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
146 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
147 Paso_SystemMatrix_solvePreconditioner(A,v,temp_vector);
148 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
149 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
150 /* v = P^{-1} * A y1 */
151
152 Paso_Copy(n,u1,v);
153
154 theta = 0.0;
155 eta = 0.0;
156
157 tau = Paso_l2(n,res,A->mpi_info);
158
159 rho = tau * tau;
160
161 norm_of_residual=tau;
162
163 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) ))
164 {
165
166
167 sigma=Paso_InnerProduct(n,res,v,A->mpi_info);
168
169 if (! (breakFlag = (ABS(sigma) == 0.))) {
170
171 alpha = rho / sigma;
172
173 for (j=0; j<=1; j=j+1)
174 {
175 /* Compute y2 and u2 only if you have to */
176 if ( j == 1 ){
177 Paso_LinearCombination(n,y2,PASO_ONE,y1,-alpha,v); /* y2 = y1 - alpha*v */
178
179 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
180 Performance_startMonitor(pp,PERFORMANCE_MVM);
181 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
182 Performance_stopMonitor(pp,PERFORMANCE_MVM);
183 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
184
185 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
186 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
187 Paso_SystemMatrix_solvePreconditioner(A,u2,temp_vector);
188 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
189 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
190 /* u2 = P^{-1} * A y2 */
191 }
192 m = 2 * (num_iter+1) - 2 + (j+1);
193
194 if (j==0) {
195 Paso_Update(n,1.,w,-alpha,u1); /* w = w - alpha * u1 */
196 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y1); /* d = ( theta * theta * eta / alpha )*d + y1 */
197 }
198 if (j==1) {
199 Paso_Update(n,1.,w,-alpha,u2); /* w = w - -alpha * u2 */
200 Paso_Update(n,( theta * theta * eta / alpha ),d,1.,y2); /* d = ( theta * theta * eta / alpha )*d + y2 */
201 }
202
203 theta =Paso_l2(n,w,A->mpi_info)/tau;
204 /*printf("tau = %e, %e %e\n",tau, Paso_l2(n,w,A->mpi_info)/tau, theta);*/
205 c = PASO_ONE / sqrt ( PASO_ONE + theta * theta );
206 tau = tau * theta * c;
207 eta = c * c * alpha;
208 Paso_Update(n,1.,x,eta,d);
209 }
210
211 breakFlag = (ABS(rho) == 0);
212
213 rhon = Paso_InnerProduct(n,res,w,A->mpi_info);
214 beta = rhon / rho;
215 rho = rhon;
216
217 Paso_LinearCombination(n,y1, PASO_ONE,w,beta,y2); /* y1 = w + beta * y2 */
218
219 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
220 Performance_startMonitor(pp,PERFORMANCE_MVM);
221 Paso_SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1,PASO_ZERO,temp_vector);
222 Performance_stopMonitor(pp,PERFORMANCE_MVM);
223
224 Performance_startMonitor(pp,PERFORMANCE_PRECONDITIONER);
225 Paso_SystemMatrix_solvePreconditioner(A,u1,temp_vector);
226 Performance_stopMonitor(pp,PERFORMANCE_PRECONDITIONER);
227 Performance_startMonitor(pp,PERFORMANCE_SOLVER);
228 /* u1 = P^{-1} * A y1 */
229
230 Paso_LinearCombination(n,temp_vector,PASO_ONE,u2,beta,v); /* t = u2 + beta * v */
231 Paso_LinearCombination(n,v,PASO_ONE,u1,beta,temp_vector); /* v = u1 + beta * t */
232 }
233 maxIterFlag = (num_iter > maxit);
234 norm_of_residual=tau*sqrt ( (double) (m + 1 ) );
235 convergeFlag=(norm_of_residual<(*tolerance));
236
237
238 if (maxIterFlag) {
239 status = SOLVER_MAXITER_REACHED;
240 } else if (breakFlag) {
241 status = SOLVER_BREAKDOWN;
242 }
243 ++(num_iter);
244 }
245 /* end of iteration */
246
247 Performance_stopMonitor(pp,PERFORMANCE_SOLVER);
248 delete[] (u1);
249 delete[] (u2);
250 delete[] (y1);
251 delete[] (y2);
252 delete[] (d);
253 delete[] (w);
254 delete[] (v);
255 delete[] (temp_vector);
256 delete[] (res);
257 *iter=num_iter;
258 *tolerance=norm_of_residual;
259
260 /* End of TFQMR */
261 return status;
262 }

  ViewVC Help
Powered by ViewVC 1.1.26