/[escript]/trunk/paso/src/TFQMR.cpp
ViewVC logotype

Contents of /trunk/paso/src/TFQMR.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4873 - (show annotations)
Wed Apr 16 06:38:51 2014 UTC (5 years, 5 months ago) by caltinay
File size: 7496 byte(s)
whitespace only changes.

1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2003-2014 by University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Open Software License version 3.0
9 * http://www.opensource.org/licenses/osl-3.0.php
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18 /*
19 *
20 * Purpose
21 * =======
22 *
23 * TFQMR solves the linear system A*x = b
24 *
25 * Convergence test: norm( b - A*x )< TOL.
26 * For other measures, see the above reference.
27 *
28 * Arguments
29 * =========
30 *
31 * r (input) DOUBLE PRECISION array, dimension N.
32 *
33 *
34 * x (input/output) DOUBLE PRECISION array, dimension N.
35 *
36 *
37 * ITER (input/output) INT
38 * On input, the maximum iterations to be performed.
39 * On output, actual number of iterations performed.
40 *
41 * INFO (output) INT
42 *
43 * = SOLVER_NO_ERROR: Successful exit. Iterated approximate solution returned.
44 * = SOLVER_MAXITER_REACHED
45 * = SOLVER_INPUT_ERROR Illegal parameter:
46 * = SOLVER_BREAKDOWN: If parameters RHO or OMEGA become smaller
47 * = SOLVER_MEMORY_ERROR : If parameters RHO or OMEGA become smaller
48 *
49 * ==============================================================
50 */
51
52 #include "Solver.h"
53 #include "PasoUtil.h"
54
55 namespace paso {
56
57 //#define PASO_DYNAMIC_SCHEDULING_MVM
58 #if defined PASO_DYNAMIC_SCHEDULING_MVM && defined __OPENMP
59 #define USE_DYNAMIC_SCHEDULING
60 #endif
61
62 err_t Solver_TFQMR(SystemMatrix_ptr A, double* r, double* x, dim_t* iter,
63 double* tolerance, Performance* pp)
64 {
65 int m=1;
66 int j=0;
67 dim_t num_iter=0;
68 bool breakFlag=false, maxIterFlag=false, convergeFlag=false;
69 err_t status = SOLVER_NO_ERROR;
70 const dim_t n = A->getTotalNumRows();
71 double eta,theta,tau,rho,beta,alpha,sigma,rhon,c;
72 double norm_of_residual;
73
74 double* u1 = new double[n];
75 double* u2 = new double[n];
76 double* y1 = new double[n];
77 double* y2 = new double[n];
78 double* d = new double[n];
79 double* w = new double[n];
80 double* v = new double[n];
81 double* temp_vector = new double[n];
82 double* res = new double[n];
83
84 dim_t maxit = *iter;
85
86 if (maxit <= 0) {
87 status = SOLVER_INPUT_ERROR;
88 }
89
90 util::zeroes(n, x);
91
92 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER);
93 A->solvePreconditioner(res, r);
94 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER);
95
96 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
97 util::zeroes(n,u2);
98 util::zeroes(n,y2);
99 util::copy(n,w,res);
100 util::copy(n,y1,res);
101 util::zeroes(n,d);
102 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
103
104 Performance_startMonitor(pp, PERFORMANCE_MVM);
105 SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1, PASO_ZERO, temp_vector);
106 Performance_stopMonitor(pp, PERFORMANCE_MVM);
107 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
108
109 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
110 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER);
111 A->solvePreconditioner(v,temp_vector);
112 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER);
113
114 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
115 // v = P^{-1} * A y1
116 util::copy(n, u1, v);
117
118 theta = 0.0;
119 eta = 0.0;
120 tau = util::l2(n,res,A->mpi_info);
121 rho = tau * tau;
122 norm_of_residual=tau;
123
124 while (!(convergeFlag || maxIterFlag || breakFlag || (status !=SOLVER_NO_ERROR) )) {
125 sigma = util::innerProduct(n,res,v,A->mpi_info);
126 if (! (breakFlag = (ABS(sigma) == 0.))) {
127 alpha = rho / sigma;
128 for (j=0; j<=1; j=j+1) {
129 // Compute y2 and u2 only if you have to
130 if (j == 1) {
131 // y2 = y1 - alpha*v
132 util::linearCombination(n, y2, PASO_ONE, y1, -alpha, v);
133
134 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
135 Performance_startMonitor(pp, PERFORMANCE_MVM);
136 SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y2,PASO_ZERO,temp_vector);
137 Performance_stopMonitor(pp, PERFORMANCE_MVM);
138 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
139
140 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
141 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER);
142 A->solvePreconditioner(u2,temp_vector);
143 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER);
144 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
145 // u2 = P^{-1} * A y2
146 }
147 m = 2 * (num_iter+1) - 2 + (j+1);
148
149 if (j==0) {
150 // w = w - alpha * u1
151 util::update(n, 1., w, -alpha, u1);
152 // d = (theta * theta * eta / alpha)*d + y1
153 util::update(n, (theta * theta * eta / alpha), d, 1., y1);
154 } else if (j==1) {
155 // w = w - -alpha * u2
156 util::update(n, 1., w, -alpha, u2);
157 // d = (theta * theta * eta / alpha)*d + y2
158 util::update(n, (theta * theta * eta / alpha), d, 1., y2);
159 }
160
161 theta =util::l2(n,w,A->mpi_info)/tau;
162 //printf("tau = %e, %e %e\n",tau, util::l2(n,w,A->mpi_info)/tau, theta);
163 c = PASO_ONE / sqrt(PASO_ONE + theta * theta);
164 tau = tau * theta * c;
165 eta = c * c * alpha;
166 util::update(n,1.,x,eta,d);
167 }
168
169 breakFlag = (ABS(rho) == 0);
170
171 rhon = util::innerProduct(n, res, w, A->mpi_info);
172 beta = rhon / rho;
173 rho = rhon;
174
175 // y1 = w + beta * y2
176 util::linearCombination(n,y1, PASO_ONE,w,beta,y2);
177
178 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
179 Performance_startMonitor(pp, PERFORMANCE_MVM);
180 SystemMatrix_MatrixVector_CSR_OFFSET0(PASO_ONE, A, y1, PASO_ZERO, temp_vector);
181 Performance_stopMonitor(pp, PERFORMANCE_MVM);
182
183 Performance_startMonitor(pp, PERFORMANCE_PRECONDITIONER);
184 A->solvePreconditioner(u1, temp_vector);
185 Performance_stopMonitor(pp, PERFORMANCE_PRECONDITIONER);
186 Performance_startMonitor(pp, PERFORMANCE_SOLVER);
187 // u1 = P^{-1} * A y1
188
189 // t = u2 + beta * v
190 util::linearCombination(n,temp_vector,PASO_ONE,u2,beta,v);
191 // v = u1 + beta * t
192 util::linearCombination(n,v,PASO_ONE,u1,beta,temp_vector);
193 }
194 maxIterFlag = (num_iter > maxit);
195 norm_of_residual = tau*sqrt((double)(m + 1));
196 convergeFlag = (norm_of_residual<(*tolerance));
197
198 if (maxIterFlag) {
199 status = SOLVER_MAXITER_REACHED;
200 } else if (breakFlag) {
201 status = SOLVER_BREAKDOWN;
202 }
203 ++num_iter;
204 } // end of iterations
205
206 Performance_stopMonitor(pp, PERFORMANCE_SOLVER);
207 delete[] u1;
208 delete[] u2;
209 delete[] y1;
210 delete[] y2;
211 delete[] d;
212 delete[] w;
213 delete[] v;
214 delete[] temp_vector;
215 delete[] res;
216 *iter=num_iter;
217 *tolerance=norm_of_residual;
218 return status;
219 }
220
221 } // namespace paso
222

Properties

Name Value
svn:mergeinfo /branches/amg_from_3530/paso/src/TFQMR.cpp:3531-3826 /branches/lapack2681/paso/src/TFQMR.cpp:2682-2741 /branches/pasowrap/paso/src/TFQMR.cpp:3661-3674 /branches/py3_attempt2/paso/src/TFQMR.cpp:3871-3891 /branches/restext/paso/src/TFQMR.cpp:2610-2624 /branches/ripleygmg_from_3668/paso/src/TFQMR.cpp:3669-3791 /branches/stage3.0/paso/src/TFQMR.cpp:2569-2590 /branches/symbolic_from_3470/paso/src/TFQMR.cpp:3471-3974 /branches/symbolic_from_3470/ripley/test/python/paso/src/TFQMR.cpp:3517-3974 /release/3.0/paso/src/TFQMR.cpp:2591-2601 /trunk/paso/src/TFQMR.cpp:4257-4344 /trunk/ripley/test/python/paso/src/TFQMR.cpp:3480-3515

  ViewVC Help
Powered by ViewVC 1.1.26