/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4143 - (show annotations)
Thu Jan 17 08:48:47 2013 UTC (6 years, 8 months ago) by gross
File MIME type: text/x-python
File size: 21249 byte(s)
documentation of minimizer update plus clarification of notations on the code
1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['MinimizerException', 'MinimizerIterationIncurableBreakDown', 'MinimizerMaxIterReached' , 'AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 lslogger=logging.getLogger('inv.minimizer.linesearch')
38 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
39
40 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
41 """
42 Helper function for `line_search` below which tries to tighten the range
43 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
44 J. Nocedal for an explanation.
45 """
46 i=0
47 while True:
48 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
49 args_a=phiargs(alpha)
50 phi_a=phi(alpha, *args_a)
51 zoomlogger.debug("Zoom.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
52 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
53 alpha_hi=alpha
54 else:
55 gphi_a=gradphi(alpha, *args_a)
56 zoomlogger.debug("Zoom.grad(phi(alpha))=%e"%(gphi_a))
57 if np.abs(gphi_a) <= -c2*gphi0:
58 break
59 if gphi_a*(alpha_hi-alpha_lo) >= 0:
60 alpha_hi = alpha_lo
61 alpha_lo=alpha
62 phi_lo=phi_a
63 i+=1
64 if i>IMAX:
65 gphi_a=None
66 break
67 return alpha, phi_a, gphi_a
68
69 def line_search(f, x, p, g_Jx, Jx, alpha_truncationax=50.0, c1=1e-4, c2=0.9, IMAX=15):
70 """
71 Line search method that satisfies the strong Wolfe conditions.
72 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
73
74 :param f: callable objective function f(x)
75 :param x: start value for the line search
76 :param p: search direction
77 :param g_Jx: value for the gradient of f at x
78 :param Jx: value of f(x)
79 :param alpha_truncationax: algorithm terminates if alpha reaches this value
80 :param c1: value for Armijo condition (see reference)
81 :param c2: value for curvature condition (see reference)
82 :param IMAX: maximum number of iterations to perform
83 """
84 # this stores the latest gradf(x+a*p) which is returned
85 g_Jx_new=[g_Jx]
86
87 def phi(a, *args):
88 """ phi(a):=f(x+a*p) """
89 return f(x+a*p, *args)
90 def gradphi(a, *args):
91 g_Jx_new[0]=f.getGradient(x+a*p, *args)
92 return f.getDualProduct(p, g_Jx_new[0])
93
94 def phiargs(a):
95 try:
96 args=f.getArguments(x+a*p)
97 except:
98 args=()
99 return args
100
101 old_alpha=0.
102 # we assume g_Jx is properly scaled so alpha=1 is a reasonable starting value
103 alpha=1.
104 if Jx is None:
105 args0=phiargs(0.)
106 phi0=phi(0., *args0)
107 else:
108 phi0=Jx
109 lslogger.debug("phi(0)=%e"%(phi0))
110 gphi0=f.getDualProduct(p, g_Jx) #gradphi(0., *args0)
111 lslogger.debug("grad phi(0)=%e"%(gphi0))
112 old_phi_a=phi0
113 i=1
114
115 while i<IMAX and alpha>0. and alpha<alpha_truncationax:
116
117 args_a=phiargs(alpha)
118 phi_a=phi(alpha, *args_a)
119 lslogger.debug("Line Search.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
120 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
121 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
122 break
123
124 gphi_a=gradphi(alpha, *args_a)
125 if np.abs(gphi_a) <= -c2*gphi0:
126 break
127 if gphi_a >= 0:
128 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
129 break
130
131 old_alpha=alpha
132 # the factor is arbitrary as long as there is sufficient increase
133 alpha=2.*alpha
134 old_phi_a=phi_a
135 i+=1
136
137 return alpha, phi_a, g_Jx_new[0]
138
139 class MinimizerException(Exception):
140 """
141 This is a generic exception thrown by a minimizer.
142 """
143 pass
144
145 class MinimizerMaxIterReached(MinimizerException):
146 """
147 Exception thrown if the maximum number of iteration steps is reached.
148 """
149 pass
150
151 class MinimizerIterationIncurableBreakDown(MinimizerException):
152 """
153 Exception thrown if the iteration scheme encountered an incurable breakdown.
154 """
155 pass
156
157
158 ##############################################################################
159 class AbstractMinimizer(object):
160 """
161 Base class for function minimization methods.
162 """
163
164 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED, INCURABLE_BREAKDOWN = list(range(3))
165
166 def __init__(self, J=None, m_tol=1e-5, J_tol=None, imax=300):
167 """
168 Initializes a new minimizer for a given cost function.
169
170 :param J: the cost function to be minimized
171 :type J: `CostFunction`
172 """
173 self.setCostFunction(J)
174 self._m_tol = m_tol
175 self._J_tol = J_tol
176 self._imax = imax
177 self._result = None
178 self._callback = None
179 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
180 self.setTolerance()
181
182
183 def setCostFunction(self, J):
184 """
185 set the cost function to be minimized
186
187 :param J: the cost function to be minimized
188 :type J: `CostFunction`
189 """
190 self.__J=J
191
192 def getCostFunction(self):
193 """
194 return the cost function to be minimized
195
196 :rtype: `CostFunction`
197 """
198 return self.__J
199
200 def setTolerance(self, m_tol=1e-4, J_tol=None):
201 """
202 Sets the tolerance for the stopping criterion. The minimizer stops when
203 an appropriate norm is less than `m_tol`.
204 """
205 self._m_tol = m_tol
206 self._J_tol = J_tol
207
208 def setMaxIterations(self, imax):
209 """
210 Sets the maximum number of iterations before the minimizer terminates.
211 """
212 self._imax = imax
213
214 def setCallback(self, callback):
215 """
216 Sets a callback function to be called after every iteration.
217 The arguments to the function are: (k, x, Jx, g_Jxx), where
218 k is the current iteration, x is the current estimate, Jx=f(x) and
219 g_Jxx=grad f(x).
220 """
221 if callback is not None and not callable(callback):
222 raise TypeError("Callback function not callable.")
223 self._callback = callback
224
225 def _doCallback(self, *args):
226 if self._callback is not None:
227 self._callback(*args)
228
229 def getResult(self):
230 """
231 Returns the result of the minimization.
232 """
233 return self._result
234
235 def getOptions(self):
236 """
237 Returns a dictionary of minimizer-specific options.
238 """
239 return {}
240
241 def setOptions(self, **opts):
242 """
243 Sets minimizer-specific options. For a list of possible options see
244 `getOptions()`.
245 """
246 raise NotImplementedError
247
248 def run(self, x0):
249 """
250 Executes the minimization algorithm for *f* starting with the initial
251 guess ``x0``.
252
253 :return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED`
254 """
255 raise NotImplementedError
256
257 def logSummary(self):
258 """
259 Outputs a summary of the completed minimization process to the logger.
260 """
261 if hasattr(self.getCostFunction(), "Value_calls"):
262 self.logger.warning("Number of cost function evaluations: %d"%self.getCostFunction().Value_calls)
263 self.logger.warning("Number of gradient evaluations: %d"%self.getCostFunction().Gradient_calls)
264 self.logger.warning("Number of inner product evaluations: %d"%self.getCostFunction().DualProduct_calls)
265 self.logger.warning("Number of argument evaluations: %d"%self.getCostFunction().Arguments_calls)
266 self.logger.warning("Number of norm evaluations: %d"%self.getCostFunction().Norm_calls)
267
268 ##############################################################################
269 class MinimizerLBFGS(AbstractMinimizer):
270 """
271 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
272 method.
273 """
274
275 # History size
276 _truncation = 30
277
278 # Initial Hessian multiplier
279 _initial_H = 1
280
281 # restart
282 _restart= 60
283
284 def getOptions(self):
285 return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
286
287 def setOptions(self, **opts):
288 for o in opts:
289 if o=='historySize' or 'truncation':
290 self._truncation=opts[o]
291 elif o=='initialHessian':
292 self._initial_H=opts[o]
293 elif o=='restart':
294 self._restart=opts[o]
295 else:
296 raise KeyError("Invalid option '%s'"%o)
297
298 def run(self, x):
299
300 if self.getCostFunction().provides_inverse_Hessian_approximation:
301 self.getCostFunction().updateHessian()
302 invH_scale = None
303 else:
304 invH_scale = self._initial_H
305
306 # start the iteration:
307 n_iter = 0
308 n_last_break_down=-1
309 non_curable_break_down = False
310 converged = False
311 args=self.getCostFunction().getArguments(x)
312 g_Jx=self.getCostFunction().getGradient(x, *args)
313 Jx=self.getCostFunction()(x, *args)
314 Jx_0=Jx
315
316 while not converged and not non_curable_break_down and n_iter < self._imax:
317
318 k=0
319 break_down = False
320 s_and_y=[]
321 self._doCallback(n_iter, x, Jx, g_Jx)
322
323 while not converged and not break_down and k < self._restart and n_iter < self._imax:
324 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
325 self.logger.debug("LBFGS.iteration %d .......... "%n_iter)
326 # determine search direction
327 self.logger.debug("LBFGS.J(x) = %s"%Jx)
328 self.logger.debug("LBFGS.grad f(x) = %s"%g_Jx)
329 if invH_scale: self.logger.debug("LBFGS.H = %s"%invH_scale)
330
331 p = -self._twoLoop(invH_scale, g_Jx, s_and_y, x, *args)
332 # determine step length
333 alpha, Jx_new, g_Jx_new = line_search(self.getCostFunction(), x, p, g_Jx, Jx)
334 # this function returns the a scaling alpha for the serch direction
335 # as well the cost function evaluation and gradient for the new solution
336 # approximation x_new = x + alpha*p
337
338 self.logger.debug("LBFGS.search direction scaling alpha=%e"%(alpha))
339 # execute the step
340 delta_x = alpha*p
341 x_new = x + delta_x
342 self.logger.debug("LBFGS.J(x) = %s"%Jx_new)
343
344 converged = True
345 if self._J_tol:
346 flag= abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
347 if flag:
348 self.logger.debug("LBFGS: cost function has converged: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new-Jx_0)*self._J_tol))
349 else:
350 self.logger.debug("LBFGS: cost function checked: dJ, J * J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new)*self._J_tol))
351
352 converged = converged and flag
353 if self._m_tol:
354 norm_x=self.getCostFunction().getNorm(x_new)
355 norm_dx=self.getCostFunction().getNorm(delta_x)
356 flag= norm_dx <= self._m_tol * norm_x
357 if flag:
358 self.logger.debug("LBFGS: solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
359 else:
360 self.logger.debug("LBFGS: solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
361 converged = converged and flag
362
363 x=x_new
364 if converged:
365 break
366
367 # unfortunatly there is more work to do!
368 if g_Jx_new is None:
369 args=self.getCostFunction().getArguments(x_new)
370 g_Jx_new=self.getCostFunction().getGradient(x_new, args)
371 delta_g=g_Jx_new-g_Jx
372
373 rho=self.getCostFunction().getDualProduct(delta_x, delta_g)
374 if abs(rho)>0 :
375 s_and_y.append((delta_x,delta_g, rho ))
376 else:
377 break_down=True
378
379 self.getCostFunction().updateHessian()
380 g_Jx=g_Jx_new
381 Jx=Jx_new
382
383 k+=1
384 n_iter+=1
385 self._doCallback(k, x, Jx, g_Jx)
386
387 # delete oldest vector pair
388 if k>self._truncation: s_and_y.pop(0)
389
390 if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down :
391 # set the new scaling factor (approximation of inverse Hessian)
392 denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
393 if denom > 0:
394 invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
395 else:
396 invH_scale=self._initial_H
397 self.logger.debug("LBFGS.Break down in H update. Resetting to initial value %s."%self._initial_H)
398 # case handeling for inner iteration:
399 if break_down:
400 if n_iter == n_last_break_down +1 :
401 non_curable_break_down = True
402 self.logger.debug("LBFGS. Incurable break down detected in step %d."%(n_iter,))
403 else:
404 n_last_break_down = n_iter
405 self.logger.debug("LBFGS.Break down detected in step %d. Iteration is restarted."%(n_iter,))
406 if not k < self._restart:
407 self.logger.debug("LBFGS.Iteration is restarted after %d steps."%(n_iter,))
408
409 # case handeling for inner iteration:
410 self._result=x
411 if n_iter >= self._imax:
412 self.return_status=self.MAX_ITERATIONS_REACHED
413 raise MinimizerMaxIterReached("Gave up after %d steps."%(n_iter,))
414 self.logger.debug("LBFGS.>>>>>> Maximum number of iterations reached!")
415 elif non_curable_break_down:
416 self.return_status=self.INCURABLE_BREAKDOWN
417 self.logger.debug("LBFGS.>>>>>> Uncurable breakdown!")
418 raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%(n_iter,))
419 else:
420 self.return_status=self.TOLERANCE_REACHED
421 self.logger.debug("LBFGS.Success after %d iterations!"%k)
422
423 return self.return_status
424
425 def _twoLoop(self, invH_scale, g_Jx, s_and_y, x, *args):
426 """
427 Helper for the L-BFGS method.
428 See 'Numerical Optimization' by J. Nocedal for an explanation.
429 """
430 q=g_Jx
431 alpha=[]
432 for s,y, rho in reversed(s_and_y):
433 a=self.getCostFunction().getDualProduct(s, q)/rho
434 alpha.append(a)
435 q=q-a*y
436
437 if self.getCostFunction().provides_inverse_Hessian_approximation:
438 r = self.getCostFunction().getInverseHessianApproximation(x, q, *args)
439 else:
440 r = invH_scale * q
441
442 for s,y,rho in s_and_y:
443 beta = self.getCostFunction().getDualProduct(r, y)/rho
444 a = alpha.pop()
445 r = r + s * (a-beta)
446 return r
447
448 ##############################################################################
449 class MinimizerBFGS(AbstractMinimizer):
450 """
451 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
452 """
453
454 # Initial Hessian multiplier
455 _initial_H = 1
456
457 def getOptions(self):
458 return {'initialHessian':self._initial_H}
459
460 def setOptions(self, **opts):
461 for o in opts:
462 if o=='initialHessian':
463 self._initial_H=opts[o]
464 else:
465 raise KeyError("Invalid option '%s'"%o)
466
467 def run(self, x):
468 args=self.getCostFunction().getArguments(x)
469 g_Jx=self.getCostFunction().getGradient(x, *args)
470 Jx=self.getCostFunction()(x, *args)
471 k=0
472 try:
473 n=len(x)
474 except:
475 n=x.getNumberOfDataPoints()
476 I=np.eye(n)
477 H=self._initial_H*I
478 gnorm=Lsup(g_Jx)
479 self._doCallback(k, x, Jx, g_Jx)
480
481 while gnorm > self._m_tol and k < self._imax:
482 self.logger.debug("iteration %d, gnorm=%e"%(k,gnorm))
483
484 # determine search direction
485 d=-self.getCostFunction().getDualProduct(H, g_Jx)
486
487 self.logger.debug("H = %s"%H)
488 self.logger.debug("grad f(x) = %s"%g_Jx)
489 self.logger.debug("d = %s"%d)
490 self.logger.debug("x = %s"%x)
491
492 # determine step length
493 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, g_Jx, Jx)
494 self.logger.debug("alpha=%e"%alpha)
495 # execute the step
496 x_new=x+alpha*d
497 delta_x=x_new-x
498 x=x_new
499 if g_Jx_new is None:
500 g_Jx_new=self.getCostFunction().getGradient(x_new)
501 delta_g=g_Jx_new-g_Jx
502 g_Jx=g_Jx_new
503 k+=1
504 self._doCallback(k, x, Jx, g_Jx)
505 gnorm=Lsup(g_Jx)
506 if (gnorm<=self._m_tol): break
507
508 # update Hessian
509 denom=self.getCostFunction().getDualProduct(delta_x, delta_g)
510 if denom < EPSILON * gnorm:
511 denom=1e-5
512 self.logger.debug("Break down in H update. Resetting.")
513 rho=1./denom
514 self.logger.debug("rho=%e"%rho)
515 A=I-rho*delta_x[:,None]*delta_g[None,:]
516 AT=I-rho*delta_g[:,None]*delta_x[None,:]
517 H=self.getCostFunction().getDualProduct(A, self.getCostFunction().getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
518 if k >= self._imax:
519 reason=self.MAX_ITERATIONS_REACHED
520 self.logger.debug("Maximum number of iterations reached!")
521 else:
522 reason=self.TOLERANCE_REACHED
523 self.logger.debug("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
524
525 self._result=x
526 return reason
527
528 ##############################################################################
529 class MinimizerNLCG(AbstractMinimizer):
530 """
531 Minimizer that uses the nonlinear conjugate gradient method
532 (Fletcher-Reeves variant).
533 """
534
535 def run(self, x):
536 i=0
537 k=0
538 args=self.getCostFunction().getArguments(x)
539 r=-self.getCostFunction().getGradient(x, *args)
540 Jx=self.getCostFunction()(x, *args)
541 d=r
542 delta=self.getCostFunction().getDualProduct(r,r)
543 delta0=delta
544 self._doCallback(i, x, Jx, -r)
545
546 while i<self._imax and Lsup(r)>self._m_tol:
547 self.logger.debug("iteration %d"%i)
548 self.logger.debug("grad f(x) = %s"%(-r))
549 self.logger.debug("d = %s"%d)
550 self.logger.debug("x = %s"%x)
551
552 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, -r, Jx, c2=0.4)
553 self.logger.debug("alpha=%e"%(alpha))
554 x=x+alpha*d
555 r=-self.getCostFunction().getGradient(x) if g_Jx_new is None else -g_Jx_new
556 delta_o=delta
557 delta=self.getCostFunction().getDualProduct(r,r)
558 beta=delta/delta_o
559 d=r+beta*d
560 k=k+1
561 try:
562 lenx=len(x)
563 except:
564 lenx=x.getNumberOfDataPoints()
565 if k == lenx or self.getCostFunction().getDualProduct(r,d) <= 0:
566 d=r
567 k=0
568 i+=1
569 self._doCallback(i, x, Jx, g_Jx_new)
570
571 if i >= self._imax:
572 reason=self.MAX_ITERATIONS_REACHED
573 self.logger.debug("Maximum number of iterations reached!")
574 else:
575 reason=self.TOLERANCE_REACHED
576 self.logger.debug("Success after %d iterations! Final delta=%e"%(i,delta))
577
578 self._result=x
579 return reason
580
581
582 if __name__=="__main__":
583 # Example usage with function 'rosen' (minimum=[1,1,...1]):
584 from scipy.optimize import rosen, rosen_der
585 from esys.downunder import MeteredCostFunction
586 import sys
587 N=10
588 x0=np.array([4.]*N) # initial guess
589
590 class RosenFunc(MeteredCostFunction):
591 def __init__(self):
592 super(RosenFunc, self).__init__()
593 self.provides_inverse_Hessian_approximation=False
594 def _getDualProduct(self, f0, f1):
595 return np.dot(f0, f1)
596 def _getValue(self, x, *args):
597 return rosen(x)
598 def _getGradient(self, x, *args):
599 return rosen_der(x)
600 def _getNorm(self,x):
601 return Lsup(x)
602
603 f=RosenFunc()
604 m=None
605 if len(sys.argv)>1:
606 method=sys.argv[1].lower()
607 if method=='nlcg':
608 m=MinimizerNLCG(f)
609 elif method=='bfgs':
610 m=MinimizerBFGS(f)
611
612 if m is None:
613 # default
614 m=MinimizerLBFGS(f)
615 #m.setOptions(historySize=10000)
616
617 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
618 m.setTolerance(m_tol=1e-5)
619 m.setMaxIterations(600)
620 m.run(x0)
621 m.logSummary()
622 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
623
624 #from scipy.optimize import fmin_cg
625 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
626

  ViewVC Help
Powered by ViewVC 1.1.26