/[escript]/trunk/doc/inversion/Minimization.tex
ViewVC logotype

Contents of /trunk/doc/inversion/Minimization.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4286 - (show annotations)
Thu Mar 7 04:28:11 2013 UTC (6 years, 5 months ago) by caltinay
File MIME type: application/x-tex
File size: 13791 byte(s)
Assorted spelling fixes.

1
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 % Copyright (c) 2003-2013 by University of Queensland
4 % http://www.uq.edu.au
5 %
6 % Primary Business: Queensland, Australia
7 % Licensed under the Open Software License version 3.0
8 % http://www.opensource.org/licenses/osl-3.0.php
9 %
10 % Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 % Development since 2012 by School of Earth Sciences
12 %
13 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
14
15 \chapter{Minimization Algorithms}\label{chapter:ref:Minimization}
16 We need to find the level set function $m$ minimizing the cost function $J$ as
17 defined in Equation~\ref{REF:EQU:INTRO 1}. The physical parameters $p^f$ and
18 the data defects are linked through state variables $u^f$ which is given as a
19 solution of a partial differential equation (PDE) with coefficients depending
20 on $p_f$. This PDE (or -- in case of several forward models -- this set of PDEs)
21 defines a constraint in the minimization problem.
22 In the context of our applications it can be assumed that the PDE is of
23 'maximum rank', i.e. for a given value of the level set function $m$ there is a
24 unique value for the state variables $u^f$ as the solution of the forward model.
25 So from a mathematical point of view the state variable $u^f$ can be eliminated
26 from the problem and the minimization problem becomes in fact a minimization
27 problem for the level set function $m$ alone where the physical parameters
28 which are of most interest in applications can easily be derived from the
29 solution. However one needs to keep in mind that each evaluation of the cost
30 function requires the solution of a PDE (an additional PDE solution is required
31 for the gradient).
32
33 In some application cases the optimization problem to be solved defines a
34 quadratic programming problem which would allow to use a special case of
35 solvers.
36 However, for the general scenarios we are interested in here we cannot assume
37 this simplification and need to be able to solve for a general cost function.
38 The method used here is the limited-memory Broyden-Fletcher-Goldfarb-Shanno
39 (\emph{L-BFGS}) method, see~\cite{Nocedal1980}.
40 This method is a quasi-Newton method.
41 To implement the method one needs to provide the evaluation of the cost
42 function $J$ and its gradient $\nabla J$ and dual product $<.,.>$\footnote{A
43 dual product $<.,.>$ defines a function which assigns a pair $(p,g)$ of a level
44 set increment $p$ and a gradient $g$ a real number $<p,g>$. It is linear
45 in the first and linear in the second argument.}
46 such that for $\alpha \to$ 0
47 \begin{equation}\label{EQU:MIN:1}
48 J(m+\alpha p) = J(m) + \alpha \cdot < p , \nabla J(m)> + o(\alpha)
49 \end{equation}
50 where $p$ is an increment to the level set function\footnote{$o$ denotes the
51 little o notation see \url{http://en.wikipedia.org/wiki/Big_O_notation}}.
52 Notice that if $m$ is the unknown solution one has $\nabla J(m)=0$.
53 Moreover, an approximation of the inverse of the Hessian operator
54 $\nabla \nabla J(m)$ needs to be provided for a given value of $m$.
55 In the implementation we don't need to provide the entire operator but for a
56 given gradient difference $g$ an approximation $h=Hg$ of
57 $\nabla \nabla J(m))^{-1}g$ needs to be calculated.
58 This is an approximative solution of the equation $\nabla \nabla J(m)) h=g$
59 which in variational form is given as
60 \begin{equation}\label{EQU:MIN:2}
61 <p, \nabla \nabla J(m)\; h > = <p, g>
62 \end{equation}
63 for all level set increments $p$.
64 In the cases relevant here, it is a possible approach to make the approximation
65 $\nabla \nabla J(m) \approx \nabla \nabla J^{reg}(m)$ which can easily be
66 constructed and the resulting variational Equation~\ref{EQU:MIN:2} can easily
67 be solved, see Chapter~\ref{Chp:ref:regularization}.
68
69 L-BFGS is an iterative method which calculates a sequence of level set
70 approximations $m_{k}$ which converges towards the unknown level set function
71 minimizing the cost function.
72 This iterative process is terminated if certain stopping criteria are fulfilled.
73 A criterion commonly used is
74 \begin{equation}\label{EQU:MIN:3a}
75 \| m_k - m_{k-1} \| \le \|m_k\| \cdot {m\_tol}
76 \end{equation}
77 in order to terminate the iteration after $m_k$ has been calculated.
78 In this condition $\|.\|$ denotes a norm and $m\_tol$ defines a relative
79 tolerance. A typical value for $m\_tol$ is $10^{-4}$. Alternatively one can
80 check for changes in the cost function:
81 \begin{equation}\label{EQU:MIN:3b}
82 \mid J(m_k) - J(m_k) \mid \le \mid J(m_k) - J(m_0) \mid \cdot {J\_tol}
83 \end{equation}
84 where ${J\_tol}$ is a given tolerance and $m_0$ is the initial guess of the solution.
85 As this criterion depends on the initial guess it is not recommended.
86
87 \section{Solver Classes}
88
89 \begin{classdesc}{MinimizerLBFGS}{%
90 \optional{J=\None}%
91 \optional{, m_tol=1e-4}%
92 \optional{, J_tol=\None}%
93 \optional{, imax=300}}
94 constructs an instance of the limited-memory Broyden-Fletcher-Goldfarb-Shanno \emph{L-BFGS} solver.
95 \member{J} sets the cost function to be minimized, see Section~\ref{chapter:ref:Minimization: costfunction class}.
96 If not present the cost function needs to be set using the \member{setCostFunction} method.
97 \member{m_tol} and \member{J_tol} specify the tolerances for the stopping
98 criteria (see description of \member{setTolerance} below).
99 \member{imax} sets the maximum number of iteration steps (see
100 \member{setMaxIterations} below).
101 \end{classdesc}
102
103 \begin{methoddesc}[MinimizerLBFGS]{setTolerance}{\optional{m_tol=1e-4}\optional{, J_tol=\None}}
104 sets the tolerances for the stopping criteria.
105 \member{m_tol} sets the tolerance for the unknown level set Function~\ref{EQU:MIN:3a}.
106 If \member{m_tol}=\None the tolerance on level set function is not checked.
107 \member{J_tol} sets the tolerance for the cost function, see Equation~\ref{EQU:MIN:3b}.
108 If \member{J_tol}=\None tolerance on the cost function is not checked (recommended).
109 If \member{m_tol} and \member{J_tol} are both set criterion~\ref{EQU:MIN:3a}
110 and criterion~\ref{EQU:MIN:3b} are both applied to terminate the iteration.
111 \end{methoddesc}
112
113 \begin{methoddesc}[MinimizerLBFGS]{setMaxIterations}{imax}
114 sets the maximum number of iterations before the minimizer terminates. If
115 the maximum number of iteration is reached a \exception{MinimizerMaxIterReached}
116 exception is thrown.
117 \end{methoddesc}
118
119 \begin{methoddesc}[MinimizerLBFGS]{getResult}{}
120 returns the solution of the optimization problem.
121 This method only returns something useful if the optimization process has
122 completed.
123 If the iteration failed the last available approximation of the solution is
124 returned.
125 \end{methoddesc}
126
127 \begin{methoddesc}[MinimizerLBFGS]{getOptions}{}
128 returns the solver options as a dictionary which contains all available option
129 names and their current value.
130 \end{methoddesc}
131
132 \begin{methoddesc}[MinimizerLBFGS]{setOptions}{%
133 \optional{truncation=30}%
134 \optional{, restart=60}%
135 \optional{, initialHessian=1}}
136 sets solver options. \member{truncation} defines number of gradients to keep in memory.
137 \member{restart} defines the number of iteration steps after which the iteration is restarted.
138 \member{initialHessian} can be used to provide an estimate for the scale of
139 the inverse Hessian. If the cost function provides such an estimate this
140 option is ignored.
141 \end{methoddesc}
142
143 \begin{methoddesc}[MinimizerLBFGS]{run}{m0}
144 runs the solution algorithm and returns an approximation of the solution.
145 \member{m0} is the initial guess to use.
146 This method may throw an exception if the maximum number of iterations is
147 reached or a solver breakdown occurs.
148 \end{methoddesc}
149
150 \begin{methoddesc}[MinimizerLBFGS]{logSummary}{}
151 writes some statistical information to the logger.
152 \end{methoddesc}
153
154 \section{\member{CostFunction} Class Template}\label{chapter:ref:Minimization: costfunction class}
155
156 \begin{classdesc}{CostFunction}{}
157 template of cost function $J$
158 \end{classdesc}
159
160 \begin{methoddesc}[CostFunction]{getDualProduct}{m, g}
161 returns the dual product $<m,g>$ of \member{m} and \member{g}.
162 \end{methoddesc}
163 %
164 \begin{methoddesc}[CostFunction]{getValue}{m, *args}
165 returns the value $J(m)$ using pre-calculated values \member{args} for $m$.
166 \end{methoddesc}
167 %
168 \begin{methoddesc}[CostFunction]{getGradient}{m, *args}
169 returns the gradient $\nabla J$ of $J$ at $m$ using pre-calculated values \member{args} for $m$.
170 \end{methoddesc}
171 %
172 \begin{methoddesc}[CostFunction]{getArguments}{m}
173 returns pre-calculated values that are shared in the calculation of $J(m)$ and
174 $\nabla J(m)$.
175 \end{methoddesc}
176 %
177 \begin{methoddesc}[CostFunction]{getNorm}{m}
178 returns the norm $\|m\|$ of \member{m}.
179 \end{methoddesc}
180 %
181 \begin{methoddesc}[CostFunction]{updateHessian}{}
182 notifies the class that the Hessian operator needs to be updated in case the
183 class provides an approximation for the inverse of the Hessian.
184 \end{methoddesc}
185 %
186 \begin{methoddesc}[CostFunction]{getInverseHessianApproximation}{m, g, *args}
187 returns an approximative evaluation $p$ of the inverse of the Hessian operator
188 of the cost function for a given gradient $g$ at location $m$.
189 \end{methoddesc}
190
191
192 %
193 %\section{The Nonlinear Conjugate Gradient method}\label{sec:NLCG}
194 %The steps for NLCG are
195 %\begin{enumerate}
196 % \item Given an initial guess $\mat{x}_0$, compute the steepest descent
197 % direction using the gradient: $\mat{p}_0=-\nabla f(\mat{x}_0)$
198 % \item Perform an inexact \emph{line search} (see Section~\ref{sec:LineSearch}) to obtain the step
199 % length $\alpha_i$ which loosely minimizes $f(\mat{x}_i+\alpha_i \mat{d}_i)$
200 % \item Update the position: $\mat{x}_{i+1}=\mat{x}_i+\alpha_i \mat{d}_i$
201 % \item Update the conjugate direction: $\mat{d}_{i+1}=-\nabla f(\mat{x}_{i+1})+\beta_{i+1} \mat{d}_i$\label{CGupdate}
202 %\end{enumerate}
203
204 %\flushleft There are many choices for the CG update parameter $\beta_{i+1}$ in step
205 %\ref{CGupdate} above, see \cite{Hager2006} for a discussion. An efficient
206 %choice, proposed by Polak and Ribi\`{e}re, is:
207 %\begin{equation*}
208 % \beta_{i+1}=\text{max}\left\{\frac{\nabla f^T(\mat{x}_{i+1})(\nabla f(\mat{x}_{i+1})-\nabla f(\mat{x}_i))}{\| \nabla f(\mat{x}_i)\|}, 0\right\}
209 %\end{equation*}
210
211 \section{The L-BFGS Algorithm}
212
213 %\section{Limited-Memory BFGS Method}\label{sec:LBFGS}
214 The L-BFGS algorithm looks as follows (see also~\cite{Nocedal1980}):
215 \begin{algorithm}
216 \mbox{L-BFGS}
217 \begin{program}
218 \BEGIN
219 \text{Input: initial guess $x_0$ and integer $m$};
220 \text{Allocate $m$ vector pairs \{$s_i$, $g_i$\}};
221 k \leftarrow 0;
222 H_0 \leftarrow I;
223 \WHILE \NOT\text{converged} \DO
224 p_k \leftarrow |twoLoop|(\{s, g\});
225 \alpha_k \leftarrow |lineSearch|(m_k, p_k); \rcomment{See Section~\ref{sec:LineSearch}}
226 m_{k+1} \leftarrow m_k+\alpha_k p_k;
227 \IF k> truncation
228 \THEN
229 |Discard the vector pair | \{s_{k-truncation}, g_{k-truncation}\} | from storage|;
230 \FI
231 s_k \leftarrow m_{k+1}-m_k;
232 g_k \leftarrow \nabla J_{k+1}-\nabla J_k;
233 k \leftarrow k+1;
234 \OD
235 \WHERE
236 \FUNCT |twoLoop|(\{s, g\}) \BODY
237 \EXP q \leftarrow \nabla J_k;
238 \FOR i=k-1, k-2, \ldots, k-truncation \DO
239 \rho_i \leftarrow \frac{1}{<s_i, g_i>};
240 \alpha_i \leftarrow \rho_i <s_i, q>;
241 q \leftarrow q-\alpha_i\cdot g_i;
242 \OD
243 r \leftarrow H q;
244 \FOR i = k-truncation, k-truncation+1, \ldots, k-1 \DO
245 \beta \leftarrow \rho_i \cdot <r,g_i>;
246 r \leftarrow r + s_i \cdot (\alpha_i - \beta)
247 \OD
248 r \ENDEXP \ENDFUNCT
249 \END
250 \end{program}
251 \end{algorithm}
252
253 \subsection{Line Search}\label{sec:LineSearch}
254 The line search procedure minimizes the function $\phi(\alpha)$ for a given
255 level set function $m$ and search direction $p$, see \cite{Nocedal2006},
256 \cite{MoreThuente1992} with
257 \begin{equation}\label{EQU:MIN:22}
258 \phi(\alpha) = J(m+ \alpha \cdot p)
259 \end{equation}
260 Notice that
261 \begin{equation}\label{EQU:MIN:23}
262 \phi'(\alpha) = < p , \nabla J(m+\alpha \cdot p)> \; .
263 \end{equation}
264
265 \begin{algorithm}
266 \mbox{Line Search that satisfies strong Wolfe conditions}
267 \begin{program}
268 \BEGIN
269 \text{Input: $\alpha_{max}>0, 0<c_1<c_2<1$};
270 i \leftarrow 1;
271 \WHILE 1 \DO
272 \IF \phi(\alpha_i) > \phi(0)+c_1 \alpha_i \phi'(0) \OR
273 (\phi(\alpha_i) \ge \phi(\alpha_{i-1}) \AND i>1)
274 \THEN
275 \alpha_* \leftarrow |zoom|(\alpha_{i-1}, \alpha_i);
276 |break|;
277 \FI
278 \IF \|\phi'(\alpha_i)\| \le -c_2\phi'(0)
279 \THEN
280 \alpha_* \leftarrow \alpha_i;
281 |break|;
282 \FI
283 \IF \phi'(\alpha_i) \ge 0
284 \THEN
285 \alpha_* \leftarrow |zoom|(\alpha_i, \alpha_{i-1});
286 |break|;
287 \FI
288 \alpha_{i+1} = 2\alpha_i;
289 i \leftarrow i+1;
290 \OD
291 \WHERE
292 \FUNCT |zoom|(\alpha_{lo}, \alpha_{hi}) \BODY
293 \WHILE 1 \DO
294 \EXP \alpha_j \leftarrow \alpha_{lo}+\frac{\alpha_{hi}-\alpha_{lo}}{2};
295 \IF \phi(\alpha_j) > \phi(0)+c_1 \alpha_j\phi'(0) \OR %
296 \phi(\alpha_j) \ge \phi(\alpha_{lo})
297 \THEN
298 \alpha_{hi} \leftarrow \alpha_j;
299 \ELSE
300 \IF \|\phi'(\alpha_j)\| \le -c_2\phi'(0)
301 \THEN
302 \alpha_* \leftarrow \alpha_j;
303 |break|;
304 \FI
305 \IF \phi'(\alpha_j)(\alpha_{hi}-\alpha_{lo}) \ge 0
306 \THEN
307 \alpha_{hi} \leftarrow \alpha_{lo};
308 \FI
309 \alpha_{lo}=\alpha_j;
310 \FI
311 \OD
312 \ENDEXP \ENDFUNCT
313 \END
314 \end{program}
315 \end{algorithm}
316

  ViewVC Help
Powered by ViewVC 1.1.26