/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4517 - (show annotations)
Mon Aug 12 05:56:54 2013 UTC (6 years, 1 month ago) by caltinay
File MIME type: text/x-python
File size: 22613 byte(s)
Implemented caching (and relaxation) of line search step size alpha.
Experiments show a significant decrease in the number of  iterations required
to converge with this change.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2013 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2013 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['MinimizerException', 'MinimizerIterationIncurableBreakDown', 'MinimizerMaxIterReached' , 'AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 lslogger=logging.getLogger('inv.minimizer.linesearch')
38 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
39
40 class MinimizerException(Exception):
41 """
42 This is a generic exception thrown by a minimizer.
43 """
44 pass
45
46 class MinimizerMaxIterReached(MinimizerException):
47 """
48 Exception thrown if the maximum number of iteration steps is reached.
49 """
50 pass
51
52 class MinimizerIterationIncurableBreakDown(MinimizerException):
53 """
54 Exception thrown if the iteration scheme encountered an incurable
55 breakdown.
56 """
57 pass
58
59
60 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
61 """
62 Helper function for `line_search` below which tries to tighten the range
63 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
64 J. Nocedal for an explanation.
65 """
66 i=0
67 while True:
68 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
69 args_a=phiargs(alpha)
70 phi_a=phi(alpha, *args_a)
71 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
72 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
73 alpha_hi=alpha
74 else:
75 gphi_a=gradphi(alpha, *args_a)
76 zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
77 if np.abs(gphi_a) <= -c2*gphi0:
78 break
79 if gphi_a*(alpha_hi-alpha_lo) >= 0:
80 alpha_hi = alpha_lo
81 alpha_lo=alpha
82 phi_lo=phi_a
83 i+=1
84 if i>IMAX:
85 gphi_a=None
86 break
87 return alpha, phi_a, gphi_a
88
89 def line_search(f, x, p, g_Jx, Jx, alpha=1.0, alpha_truncationax=50.0, c1=1e-4, c2=0.9, IMAX=15):
90 """
91 Line search method that satisfies the strong Wolfe conditions.
92 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
93
94 :param f: callable objective function f(x)
95 :param x: start value for the line search
96 :param p: search direction
97 :param g_Jx: value for the gradient of f at x
98 :param Jx: value of f(x)
99 :param alpha: initial step length. If g_Jx is properly scaled alpha=1 is a
100 reasonable starting value.
101 :param alpha_truncationax: algorithm terminates if alpha reaches this value
102 :param c1: value for Armijo condition (see reference)
103 :param c2: value for curvature condition (see reference)
104 :param IMAX: maximum number of iterations to perform
105 """
106 # this stores the latest gradf(x+a*p) which is returned
107 g_Jx_new=[g_Jx]
108 def phi(a, *args):
109 """ phi(a):=f(x+a*p) """
110 return f(x+a*p, *args)
111 def gradphi(a, *args):
112 g_Jx_new[0]=f.getGradient(x+a*p, *args)
113 return f.getDualProduct(p, g_Jx_new[0])
114 def phiargs(a):
115 try:
116 args=f.getArguments(x+a*p)
117 except:
118 args=()
119 return args
120 old_alpha=0.
121 if Jx is None:
122 args0=phiargs(0.)
123 phi0=phi(0., *args0)
124 else:
125 phi0=Jx
126 lslogger.debug("phi(0)=%e"%(phi0))
127 gphi0=f.getDualProduct(p, g_Jx) #gradphi(0., *args0)
128 lslogger.debug("grad phi(0)=%e"%(gphi0))
129 old_phi_a=phi0
130 i=1
131
132 while i<IMAX and alpha>0. and alpha<alpha_truncationax:
133 args_a=phiargs(alpha)
134 phi_a=phi(alpha, *args_a)
135 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
136 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
137 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
138 break
139
140 gphi_a=gradphi(alpha, *args_a)
141 if np.abs(gphi_a) <= -c2*gphi0:
142 break
143 if gphi_a >= 0:
144 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
145 break
146
147 old_alpha=alpha
148 # the factor is arbitrary as long as there is sufficient increase
149 alpha=2.*alpha
150 old_phi_a=phi_a
151 i+=1
152 return alpha, phi_a, g_Jx_new[0]
153
154
155 ##############################################################################
156 class AbstractMinimizer(object):
157 """
158 Base class for function minimization methods.
159 """
160
161 def __init__(self, J=None, m_tol=1e-4, J_tol=None, imax=300):
162 """
163 Initializes a new minimizer for a given cost function.
164
165 :param J: the cost function to be minimized
166 :type J: `CostFunction`
167 """
168 self.setCostFunction(J)
169 self._m_tol = m_tol
170 self._J_tol = J_tol
171 self._imax = imax
172 self._result = None
173 self._callback = None
174 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
175 self.setTolerance()
176
177 def setCostFunction(self, J):
178 """
179 set the cost function to be minimized
180
181 :param J: the cost function to be minimized
182 :type J: `CostFunction`
183 """
184 self.__J=J
185
186 def getCostFunction(self):
187 """
188 return the cost function to be minimized
189
190 :rtype: `CostFunction`
191 """
192 return self.__J
193
194 def setTolerance(self, m_tol=1e-4, J_tol=None):
195 """
196 Sets the tolerance for the stopping criterion. The minimizer stops
197 when an appropriate norm is less than `m_tol`.
198 """
199 self._m_tol = m_tol
200 self._J_tol = J_tol
201
202 def setMaxIterations(self, imax):
203 """
204 Sets the maximum number of iterations before the minimizer terminates.
205 """
206 self._imax = imax
207
208 def setCallback(self, callback):
209 """
210 Sets a callback function to be called after every iteration.
211 The arguments to the function are: (k, x, Jx, g_Jxx), where
212 k is the current iteration, x is the current estimate, Jx=f(x) and
213 g_Jxx=grad f(x).
214 """
215 if callback is not None and not callable(callback):
216 raise TypeError("Callback function not callable.")
217 self._callback = callback
218
219 def _doCallback(self, *args):
220 if self._callback is not None:
221 self._callback(*args)
222
223 def getResult(self):
224 """
225 Returns the result of the minimization.
226 """
227 return self._result
228
229 def getOptions(self):
230 """
231 Returns a dictionary of minimizer-specific options.
232 """
233 return {}
234
235 def setOptions(self, **opts):
236 """
237 Sets minimizer-specific options. For a list of possible options see
238 `getOptions()`.
239 """
240 raise NotImplementedError
241
242 def run(self, x0):
243 """
244 Executes the minimization algorithm for *f* starting with the initial
245 guess ``x0``.
246
247 :return: the result of the minimization
248 """
249 raise NotImplementedError
250
251 def logSummary(self):
252 """
253 Outputs a summary of the completed minimization process to the logger.
254 """
255 if hasattr(self.getCostFunction(), "Value_calls"):
256 self.logger.info("Number of cost function evaluations: %d"%self.getCostFunction().Value_calls)
257 self.logger.info("Number of gradient evaluations: %d"%self.getCostFunction().Gradient_calls)
258 self.logger.info("Number of inner product evaluations: %d"%self.getCostFunction().DualProduct_calls)
259 self.logger.info("Number of argument evaluations: %d"%self.getCostFunction().Arguments_calls)
260 self.logger.info("Number of norm evaluations: %d"%self.getCostFunction().Norm_calls)
261
262 ##############################################################################
263 class MinimizerLBFGS(AbstractMinimizer):
264 """
265 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
266 method.
267 """
268
269 # History size
270 _truncation = 30
271
272 # Initial Hessian multiplier
273 _initial_H = 1
274
275 # Restart after this many iteration steps
276 _restart = 60
277
278 def getOptions(self):
279 return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
280
281 def setOptions(self, **opts):
282 for o in opts:
283 if o=='historySize' or 'truncation':
284 self._truncation=opts[o]
285 elif o=='initialHessian':
286 self._initial_H=opts[o]
287 elif o=='restart':
288 self._restart=opts[o]
289 else:
290 raise KeyError("Invalid option '%s'"%o)
291
292 def run(self, x):
293 """
294 :param x: Level set function representing our initial guess
295 :type x: `Data`
296 :return: Level set function representing the solution
297 :rtype: `Data`
298 """
299 if self.getCostFunction().provides_inverse_Hessian_approximation:
300 self.getCostFunction().updateHessian()
301 invH_scale = None
302 else:
303 invH_scale = self._initial_H
304
305 # start the iteration:
306 n_iter = 0
307 n_last_break_down=-1
308 non_curable_break_down = False
309 converged = False
310 args=self.getCostFunction().getArguments(x)
311 g_Jx=self.getCostFunction().getGradient(x, *args)
312 Jx=self.getCostFunction()(x, *args) # equivalent to getValue() for Downunder CostFunctions
313 Jx_0=Jx
314
315 while not converged and not non_curable_break_down and n_iter < self._imax:
316 k=0
317 break_down = False
318 s_and_y=[]
319 # initial step length for line search
320 alpha=1.0
321 self._doCallback(n_iter, x, Jx, g_Jx)
322
323 while not converged and not break_down and k < self._restart and n_iter < self._imax:
324 #self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
325 if n_iter%10==0:
326 self.logger.info("********** iteration %3d **********"%n_iter)
327 else:
328 self.logger.debug("********** iteration %3d **********"%n_iter)
329 # determine search direction
330 self.logger.debug("\tJ(x) = %s"%Jx)
331 self.logger.debug("\tgrad f(x) = %s"%g_Jx)
332 if invH_scale: self.logger.debug("\tH = %s"%invH_scale)
333
334 p = -self._twoLoop(invH_scale, g_Jx, s_and_y, x, *args)
335
336 # determine new step length using the last one as initial value
337 # however, avoid using too small steps for too long...
338 if alpha <= 0.5:
339 alpha=2*alpha
340 alpha, Jx_new, g_Jx_new = line_search(self.getCostFunction(), x, p, g_Jx, Jx, alpha)
341 # this function returns a scaling alpha for the search
342 # direction as well as the cost function evaluation and
343 # gradient for the new solution approximation x_new=x+alpha*p
344 self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)
345
346 # execute the step
347 delta_x = alpha*p
348 x_new = x + delta_x
349 self.logger.debug("\tJ(x) = %s"%Jx_new)
350
351 converged = True
352 if self._J_tol:
353 flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
354 if flag:
355 self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new-Jx_0)*self._J_tol))
356 else:
357 self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new)*self._J_tol))
358
359 converged = converged and flag
360 if self._m_tol:
361 norm_x=self.getCostFunction().getNorm(x_new)
362 norm_dx=self.getCostFunction().getNorm(delta_x)
363 flag= norm_dx <= self._m_tol * norm_x
364 if flag:
365 self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
366 else:
367 self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
368 converged = converged and flag
369
370 x=x_new
371 if converged:
372 break
373
374 # unfortunately there is more work to do!
375 if g_Jx_new is None:
376 args=self.getCostFunction().getArguments(x_new)
377 g_Jx_new=self.getCostFunction().getGradient(x_new, args)
378 delta_g=g_Jx_new-g_Jx
379
380 rho=self.getCostFunction().getDualProduct(delta_x, delta_g)
381 if abs(rho)>0:
382 s_and_y.append((delta_x,delta_g, rho ))
383 else:
384 break_down=True
385
386 self.getCostFunction().updateHessian()
387 g_Jx=g_Jx_new
388 Jx=Jx_new
389
390 k+=1
391 n_iter+=1
392 self._doCallback(n_iter, x, Jx, g_Jx)
393
394 # delete oldest vector pair
395 if k>self._truncation: s_and_y.pop(0)
396
397 if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
398 # set the new scaling factor (approximation of inverse Hessian)
399 denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
400 if denom > 0:
401 invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
402 else:
403 invH_scale=self._initial_H
404 self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
405 # case handling for inner iteration:
406 if break_down:
407 if n_iter == n_last_break_down+1:
408 non_curable_break_down = True
409 self.logger.debug("** Incurable break down detected in step %d."%n_iter)
410 else:
411 n_last_break_down = n_iter
412 self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
413 if not k < self._restart:
414 self.logger.debug("Iteration is restarted after %d steps."%n_iter)
415
416 # case handling for inner iteration:
417 self._result=x
418 if n_iter >= self._imax:
419 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
420 raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
421 elif non_curable_break_down:
422 self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
423 raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)
424
425 self.logger.info("Success after %d iterations!"%n_iter)
426 return self._result
427
428 def _twoLoop(self, invH_scale, g_Jx, s_and_y, x, *args):
429 """
430 Helper for the L-BFGS method.
431 See 'Numerical Optimization' by J. Nocedal for an explanation.
432 """
433 q=g_Jx
434 alpha=[]
435 for s,y, rho in reversed(s_and_y):
436 a=self.getCostFunction().getDualProduct(s, q)/rho
437 alpha.append(a)
438 q=q-a*y
439
440 if self.getCostFunction().provides_inverse_Hessian_approximation:
441 r = self.getCostFunction().getInverseHessianApproximation(x, q, *args)
442 else:
443 r = invH_scale * q
444
445 for s,y,rho in s_and_y:
446 beta = self.getCostFunction().getDualProduct(r, y)/rho
447 a = alpha.pop()
448 r = r + s * (a-beta)
449 return r
450
451 ##############################################################################
452 class MinimizerBFGS(AbstractMinimizer):
453 """
454 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
455 """
456
457 # Initial Hessian multiplier
458 _initial_H = 1
459
460 def getOptions(self):
461 return {'initialHessian':self._initial_H}
462
463 def setOptions(self, **opts):
464 for o in opts:
465 if o=='initialHessian':
466 self._initial_H=opts[o]
467 else:
468 raise KeyError("Invalid option '%s'"%o)
469
470 def run(self, x):
471 args=self.getCostFunction().getArguments(x)
472 g_Jx=self.getCostFunction().getGradient(x, *args)
473 Jx=self.getCostFunction()(x, *args)
474 k=0
475 try:
476 n=len(x)
477 except:
478 n=x.getNumberOfDataPoints()
479 I=np.eye(n)
480 H=self._initial_H*I
481 gnorm=Lsup(g_Jx)
482 self._doCallback(k, x, Jx, g_Jx)
483
484 while gnorm > self._m_tol and k < self._imax:
485 self.logger.debug("iteration %d, gnorm=%e"%(k,gnorm))
486
487 # determine search direction
488 d=-self.getCostFunction().getDualProduct(H, g_Jx)
489
490 self.logger.debug("H = %s"%H)
491 self.logger.debug("grad f(x) = %s"%g_Jx)
492 self.logger.debug("d = %s"%d)
493 self.logger.debug("x = %s"%x)
494
495 # determine step length
496 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, g_Jx, Jx)
497 self.logger.debug("alpha=%e"%alpha)
498 # execute the step
499 x_new=x+alpha*d
500 delta_x=x_new-x
501 x=x_new
502 if g_Jx_new is None:
503 g_Jx_new=self.getCostFunction().getGradient(x_new)
504 delta_g=g_Jx_new-g_Jx
505 g_Jx=g_Jx_new
506 k+=1
507 self._doCallback(k, x, Jx, g_Jx)
508 gnorm=Lsup(g_Jx)
509 self._result=x
510 if (gnorm<=self._m_tol): break
511
512 # update Hessian
513 denom=self.getCostFunction().getDualProduct(delta_x, delta_g)
514 if denom < EPSILON * gnorm:
515 denom=1e-5
516 self.logger.debug("Break down in H update. Resetting.")
517 rho=1./denom
518 self.logger.debug("rho=%e"%rho)
519 A=I-rho*delta_x[:,None]*delta_g[None,:]
520 AT=I-rho*delta_g[:,None]*delta_x[None,:]
521 H=self.getCostFunction().getDualProduct(A, self.getCostFunction().getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
522
523 if k >= self._imax:
524 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
525 raise MinimizerMaxIterReached("Gave up after %d steps."%k)
526
527 self.logger.info("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
528 return self._result
529
530 ##############################################################################
531 class MinimizerNLCG(AbstractMinimizer):
532 """
533 Minimizer that uses the nonlinear conjugate gradient method
534 (Fletcher-Reeves variant).
535 """
536
537 def run(self, x):
538 i=0
539 k=0
540 args=self.getCostFunction().getArguments(x)
541 r=-self.getCostFunction().getGradient(x, *args)
542 Jx=self.getCostFunction()(x, *args)
543 d=r
544 delta=self.getCostFunction().getDualProduct(r,r)
545 delta0=delta
546 self._doCallback(i, x, Jx, -r)
547
548 while i<self._imax and Lsup(r)>self._m_tol:
549 self.logger.debug("iteration %d"%i)
550 self.logger.debug("grad f(x) = %s"%(-r))
551 self.logger.debug("d = %s"%d)
552 self.logger.debug("x = %s"%x)
553
554 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, -r, Jx, c2=0.4)
555 self.logger.debug("alpha=%e"%(alpha))
556 x=x+alpha*d
557 r=-self.getCostFunction().getGradient(x) if g_Jx_new is None else -g_Jx_new
558 delta_o=delta
559 delta=self.getCostFunction().getDualProduct(r,r)
560 beta=delta/delta_o
561 d=r+beta*d
562 k=k+1
563 try:
564 lenx=len(x)
565 except:
566 lenx=x.getNumberOfDataPoints()
567 if k == lenx or self.getCostFunction().getDualProduct(r,d) <= 0:
568 d=r
569 k=0
570 i+=1
571 self._doCallback(i, x, Jx, g_Jx_new)
572 self._result=x
573
574 if i >= self._imax:
575 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
576 raise MinimizerMaxIterReached("Gave up after %d steps."%i)
577
578
579 self.logger.info("Success after %d iterations! Final delta=%e"%(i,delta))
580 return self._result
581
582
583 if __name__=="__main__":
584 # Example usage with function 'rosen' (minimum=[1,1,...1]):
585 from scipy.optimize import rosen, rosen_der
586 from esys.downunder import MeteredCostFunction
587 import sys
588 N=10
589 x0=np.array([4.]*N) # initial guess
590
591 class RosenFunc(MeteredCostFunction):
592 def __init__(self):
593 super(RosenFunc, self).__init__()
594 self.provides_inverse_Hessian_approximation=False
595 def _getDualProduct(self, f0, f1):
596 return np.dot(f0, f1)
597 def _getValue(self, x, *args):
598 return rosen(x)
599 def _getGradient(self, x, *args):
600 return rosen_der(x)
601 def _getNorm(self,x):
602 return Lsup(x)
603
604 f=RosenFunc()
605 m=None
606 if len(sys.argv)>1:
607 method=sys.argv[1].lower()
608 if method=='nlcg':
609 m=MinimizerNLCG(f)
610 elif method=='bfgs':
611 m=MinimizerBFGS(f)
612
613 if m is None:
614 # default
615 m=MinimizerLBFGS(f)
616 #m.setOptions(historySize=10000)
617
618 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
619 m.setTolerance(m_tol=1e-5)
620 m.setMaxIterations(600)
621 m.run(x0)
622 m.logSummary()
623 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
624
625 #from scipy.optimize import fmin_cg
626 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
627

  ViewVC Help
Powered by ViewVC 1.1.26