/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4273 - (show annotations)
Tue Mar 5 04:14:44 2013 UTC (6 years, 7 months ago) by caltinay
File MIME type: text/x-python
File size: 22061 byte(s)
More doco updates and change to the return value of minimizers.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2013 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2013 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['MinimizerException', 'MinimizerIterationIncurableBreakDown', 'MinimizerMaxIterReached' , 'AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 lslogger=logging.getLogger('inv.minimizer.linesearch')
38 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
39
40 class MinimizerException(Exception):
41 """
42 This is a generic exception thrown by a minimizer.
43 """
44 pass
45
46 class MinimizerMaxIterReached(MinimizerException):
47 """
48 Exception thrown if the maximum number of iteration steps is reached.
49 """
50 pass
51
52 class MinimizerIterationIncurableBreakDown(MinimizerException):
53 """
54 Exception thrown if the iteration scheme encountered an incurable
55 breakdown.
56 """
57 pass
58
59
60 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
61 """
62 Helper function for `line_search` below which tries to tighten the range
63 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
64 J. Nocedal for an explanation.
65 """
66 i=0
67 while True:
68 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
69 args_a=phiargs(alpha)
70 phi_a=phi(alpha, *args_a)
71 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
72 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
73 alpha_hi=alpha
74 else:
75 gphi_a=gradphi(alpha, *args_a)
76 zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
77 if np.abs(gphi_a) <= -c2*gphi0:
78 break
79 if gphi_a*(alpha_hi-alpha_lo) >= 0:
80 alpha_hi = alpha_lo
81 alpha_lo=alpha
82 phi_lo=phi_a
83 i+=1
84 if i>IMAX:
85 gphi_a=None
86 break
87 return alpha, phi_a, gphi_a
88
89 def line_search(f, x, p, g_Jx, Jx, alpha_truncationax=50.0, c1=1e-4, c2=0.9, IMAX=15):
90 """
91 Line search method that satisfies the strong Wolfe conditions.
92 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
93
94 :param f: callable objective function f(x)
95 :param x: start value for the line search
96 :param p: search direction
97 :param g_Jx: value for the gradient of f at x
98 :param Jx: value of f(x)
99 :param alpha_truncationax: algorithm terminates if alpha reaches this value
100 :param c1: value for Armijo condition (see reference)
101 :param c2: value for curvature condition (see reference)
102 :param IMAX: maximum number of iterations to perform
103 """
104 # this stores the latest gradf(x+a*p) which is returned
105 g_Jx_new=[g_Jx]
106
107 def phi(a, *args):
108 """ phi(a):=f(x+a*p) """
109 return f(x+a*p, *args)
110 def gradphi(a, *args):
111 g_Jx_new[0]=f.getGradient(x+a*p, *args)
112 return f.getDualProduct(p, g_Jx_new[0])
113 def phiargs(a):
114 try:
115 args=f.getArguments(x+a*p)
116 except:
117 args=()
118 return args
119
120 old_alpha=0.
121 # we assume g_Jx is properly scaled so alpha=1 is a reasonable starting value
122 alpha=1.
123 if Jx is None:
124 args0=phiargs(0.)
125 phi0=phi(0., *args0)
126 else:
127 phi0=Jx
128 lslogger.debug("phi(0)=%e"%(phi0))
129 gphi0=f.getDualProduct(p, g_Jx) #gradphi(0., *args0)
130 lslogger.debug("grad phi(0)=%e"%(gphi0))
131 old_phi_a=phi0
132 i=1
133
134 while i<IMAX and alpha>0. and alpha<alpha_truncationax:
135 args_a=phiargs(alpha)
136 phi_a=phi(alpha, *args_a)
137 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
138 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
139 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
140 break
141
142 gphi_a=gradphi(alpha, *args_a)
143 if np.abs(gphi_a) <= -c2*gphi0:
144 break
145 if gphi_a >= 0:
146 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
147 break
148
149 old_alpha=alpha
150 # the factor is arbitrary as long as there is sufficient increase
151 alpha=2.*alpha
152 old_phi_a=phi_a
153 i+=1
154
155 return alpha, phi_a, g_Jx_new[0]
156
157
158 ##############################################################################
159 class AbstractMinimizer(object):
160 """
161 Base class for function minimization methods.
162 """
163
164 def __init__(self, J=None, m_tol=1e-4, J_tol=None, imax=300):
165 """
166 Initializes a new minimizer for a given cost function.
167
168 :param J: the cost function to be minimized
169 :type J: `CostFunction`
170 """
171 self.setCostFunction(J)
172 self._m_tol = m_tol
173 self._J_tol = J_tol
174 self._imax = imax
175 self._result = None
176 self._callback = None
177 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
178 self.setTolerance()
179
180 def setCostFunction(self, J):
181 """
182 set the cost function to be minimized
183
184 :param J: the cost function to be minimized
185 :type J: `CostFunction`
186 """
187 self.__J=J
188
189 def getCostFunction(self):
190 """
191 return the cost function to be minimized
192
193 :rtype: `CostFunction`
194 """
195 return self.__J
196
197 def setTolerance(self, m_tol=1e-4, J_tol=None):
198 """
199 Sets the tolerance for the stopping criterion. The minimizer stops
200 when an appropriate norm is less than `m_tol`.
201 """
202 self._m_tol = m_tol
203 self._J_tol = J_tol
204
205 def setMaxIterations(self, imax):
206 """
207 Sets the maximum number of iterations before the minimizer terminates.
208 """
209 self._imax = imax
210
211 def setCallback(self, callback):
212 """
213 Sets a callback function to be called after every iteration.
214 The arguments to the function are: (k, x, Jx, g_Jxx), where
215 k is the current iteration, x is the current estimate, Jx=f(x) and
216 g_Jxx=grad f(x).
217 """
218 if callback is not None and not callable(callback):
219 raise TypeError("Callback function not callable.")
220 self._callback = callback
221
222 def _doCallback(self, *args):
223 if self._callback is not None:
224 self._callback(*args)
225
226 def getResult(self):
227 """
228 Returns the result of the minimization.
229 """
230 return self._result
231
232 def getOptions(self):
233 """
234 Returns a dictionary of minimizer-specific options.
235 """
236 return {}
237
238 def setOptions(self, **opts):
239 """
240 Sets minimizer-specific options. For a list of possible options see
241 `getOptions()`.
242 """
243 raise NotImplementedError
244
245 def run(self, x0):
246 """
247 Executes the minimization algorithm for *f* starting with the initial
248 guess ``x0``.
249
250 :return: the result of the minimization
251 """
252 raise NotImplementedError
253
254 def logSummary(self):
255 """
256 Outputs a summary of the completed minimization process to the logger.
257 """
258 if hasattr(self.getCostFunction(), "Value_calls"):
259 self.logger.info("Number of cost function evaluations: %d"%self.getCostFunction().Value_calls)
260 self.logger.info("Number of gradient evaluations: %d"%self.getCostFunction().Gradient_calls)
261 self.logger.info("Number of inner product evaluations: %d"%self.getCostFunction().DualProduct_calls)
262 self.logger.info("Number of argument evaluations: %d"%self.getCostFunction().Arguments_calls)
263 self.logger.info("Number of norm evaluations: %d"%self.getCostFunction().Norm_calls)
264
265 ##############################################################################
266 class MinimizerLBFGS(AbstractMinimizer):
267 """
268 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
269 method.
270 """
271
272 # History size
273 _truncation = 30
274
275 # Initial Hessian multiplier
276 _initial_H = 1
277
278 # Restart after this many iteration steps
279 _restart = 60
280
281 def getOptions(self):
282 return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
283
284 def setOptions(self, **opts):
285 for o in opts:
286 if o=='historySize' or 'truncation':
287 self._truncation=opts[o]
288 elif o=='initialHessian':
289 self._initial_H=opts[o]
290 elif o=='restart':
291 self._restart=opts[o]
292 else:
293 raise KeyError("Invalid option '%s'"%o)
294
295 def run(self, x):
296 if self.getCostFunction().provides_inverse_Hessian_approximation:
297 self.getCostFunction().updateHessian()
298 invH_scale = None
299 else:
300 invH_scale = self._initial_H
301
302 # start the iteration:
303 n_iter = 0
304 n_last_break_down=-1
305 non_curable_break_down = False
306 converged = False
307 args=self.getCostFunction().getArguments(x)
308 g_Jx=self.getCostFunction().getGradient(x, *args)
309 Jx=self.getCostFunction()(x, *args)
310 Jx_0=Jx
311
312 while not converged and not non_curable_break_down and n_iter < self._imax:
313 k=0
314 break_down = False
315 s_and_y=[]
316 self._doCallback(n_iter, x, Jx, g_Jx)
317
318 while not converged and not break_down and k < self._restart and n_iter < self._imax:
319 #self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
320 if n_iter%10==0:
321 self.logger.info("********** iteration %3d **********"%n_iter)
322 else:
323 self.logger.debug("********** iteration %3d **********"%n_iter)
324 # determine search direction
325 self.logger.debug("\tJ(x) = %s"%Jx)
326 self.logger.debug("\tgrad f(x) = %s"%g_Jx)
327 if invH_scale: self.logger.debug("\tH = %s"%invH_scale)
328
329 p = -self._twoLoop(invH_scale, g_Jx, s_and_y, x, *args)
330 # determine step length
331 alpha, Jx_new, g_Jx_new = line_search(self.getCostFunction(), x, p, g_Jx, Jx)
332 # this function returns a scaling alpha for the search
333 # direction as well as the cost function evaluation and
334 # gradient for the new solution approximation x_new=x+alpha*p
335 self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)
336
337 # execute the step
338 delta_x = alpha*p
339 x_new = x + delta_x
340 self.logger.debug("\tJ(x) = %s"%Jx_new)
341
342 converged = True
343 if self._J_tol:
344 flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
345 if flag:
346 self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new-Jx_0)*self._J_tol))
347 else:
348 self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new)*self._J_tol))
349
350 converged = converged and flag
351 if self._m_tol:
352 norm_x=self.getCostFunction().getNorm(x_new)
353 norm_dx=self.getCostFunction().getNorm(delta_x)
354 flag= norm_dx <= self._m_tol * norm_x
355 if flag:
356 self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
357 else:
358 self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
359 converged = converged and flag
360
361 x=x_new
362 if converged:
363 break
364
365 # unfortunately there is more work to do!
366 if g_Jx_new is None:
367 args=self.getCostFunction().getArguments(x_new)
368 g_Jx_new=self.getCostFunction().getGradient(x_new, args)
369 delta_g=g_Jx_new-g_Jx
370
371 rho=self.getCostFunction().getDualProduct(delta_x, delta_g)
372 if abs(rho)>0:
373 s_and_y.append((delta_x,delta_g, rho ))
374 else:
375 break_down=True
376
377 self.getCostFunction().updateHessian()
378 g_Jx=g_Jx_new
379 Jx=Jx_new
380
381 k+=1
382 n_iter+=1
383 self._doCallback(k, x, Jx, g_Jx)
384
385 # delete oldest vector pair
386 if k>self._truncation: s_and_y.pop(0)
387
388 if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
389 # set the new scaling factor (approximation of inverse Hessian)
390 denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
391 if denom > 0:
392 invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
393 else:
394 invH_scale=self._initial_H
395 self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
396 # case handling for inner iteration:
397 if break_down:
398 if n_iter == n_last_break_down+1:
399 non_curable_break_down = True
400 self.logger.debug("** Incurable break down detected in step %d."%n_iter)
401 else:
402 n_last_break_down = n_iter
403 self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
404 if not k < self._restart:
405 self.logger.debug("Iteration is restarted after %d steps."%n_iter)
406
407 # case handling for inner iteration:
408 self._result=x
409 if n_iter >= self._imax:
410 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
411 raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
412 elif non_curable_break_down:
413 self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
414 raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)
415
416 self.logger.info("Success after %d iterations!"%n_iter)
417 return self._result
418
419 def _twoLoop(self, invH_scale, g_Jx, s_and_y, x, *args):
420 """
421 Helper for the L-BFGS method.
422 See 'Numerical Optimization' by J. Nocedal for an explanation.
423 """
424 q=g_Jx
425 alpha=[]
426 for s,y, rho in reversed(s_and_y):
427 a=self.getCostFunction().getDualProduct(s, q)/rho
428 alpha.append(a)
429 q=q-a*y
430
431 if self.getCostFunction().provides_inverse_Hessian_approximation:
432 r = self.getCostFunction().getInverseHessianApproximation(x, q, *args)
433 else:
434 r = invH_scale * q
435
436 for s,y,rho in s_and_y:
437 beta = self.getCostFunction().getDualProduct(r, y)/rho
438 a = alpha.pop()
439 r = r + s * (a-beta)
440 return r
441
442 ##############################################################################
443 class MinimizerBFGS(AbstractMinimizer):
444 """
445 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
446 """
447
448 # Initial Hessian multiplier
449 _initial_H = 1
450
451 def getOptions(self):
452 return {'initialHessian':self._initial_H}
453
454 def setOptions(self, **opts):
455 for o in opts:
456 if o=='initialHessian':
457 self._initial_H=opts[o]
458 else:
459 raise KeyError("Invalid option '%s'"%o)
460
461 def run(self, x):
462 args=self.getCostFunction().getArguments(x)
463 g_Jx=self.getCostFunction().getGradient(x, *args)
464 Jx=self.getCostFunction()(x, *args)
465 k=0
466 try:
467 n=len(x)
468 except:
469 n=x.getNumberOfDataPoints()
470 I=np.eye(n)
471 H=self._initial_H*I
472 gnorm=Lsup(g_Jx)
473 self._doCallback(k, x, Jx, g_Jx)
474
475 while gnorm > self._m_tol and k < self._imax:
476 self.logger.debug("iteration %d, gnorm=%e"%(k,gnorm))
477
478 # determine search direction
479 d=-self.getCostFunction().getDualProduct(H, g_Jx)
480
481 self.logger.debug("H = %s"%H)
482 self.logger.debug("grad f(x) = %s"%g_Jx)
483 self.logger.debug("d = %s"%d)
484 self.logger.debug("x = %s"%x)
485
486 # determine step length
487 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, g_Jx, Jx)
488 self.logger.debug("alpha=%e"%alpha)
489 # execute the step
490 x_new=x+alpha*d
491 delta_x=x_new-x
492 x=x_new
493 if g_Jx_new is None:
494 g_Jx_new=self.getCostFunction().getGradient(x_new)
495 delta_g=g_Jx_new-g_Jx
496 g_Jx=g_Jx_new
497 k+=1
498 self._doCallback(k, x, Jx, g_Jx)
499 gnorm=Lsup(g_Jx)
500 self._result=x
501 if (gnorm<=self._m_tol): break
502
503 # update Hessian
504 denom=self.getCostFunction().getDualProduct(delta_x, delta_g)
505 if denom < EPSILON * gnorm:
506 denom=1e-5
507 self.logger.debug("Break down in H update. Resetting.")
508 rho=1./denom
509 self.logger.debug("rho=%e"%rho)
510 A=I-rho*delta_x[:,None]*delta_g[None,:]
511 AT=I-rho*delta_g[:,None]*delta_x[None,:]
512 H=self.getCostFunction().getDualProduct(A, self.getCostFunction().getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
513
514 if k >= self._imax:
515 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
516 raise MinimizerMaxIterReached("Gave up after %d steps."%k)
517
518 self.logger.info("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
519 return self._result
520
521 ##############################################################################
522 class MinimizerNLCG(AbstractMinimizer):
523 """
524 Minimizer that uses the nonlinear conjugate gradient method
525 (Fletcher-Reeves variant).
526 """
527
528 def run(self, x):
529 i=0
530 k=0
531 args=self.getCostFunction().getArguments(x)
532 r=-self.getCostFunction().getGradient(x, *args)
533 Jx=self.getCostFunction()(x, *args)
534 d=r
535 delta=self.getCostFunction().getDualProduct(r,r)
536 delta0=delta
537 self._doCallback(i, x, Jx, -r)
538
539 while i<self._imax and Lsup(r)>self._m_tol:
540 self.logger.debug("iteration %d"%i)
541 self.logger.debug("grad f(x) = %s"%(-r))
542 self.logger.debug("d = %s"%d)
543 self.logger.debug("x = %s"%x)
544
545 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, -r, Jx, c2=0.4)
546 self.logger.debug("alpha=%e"%(alpha))
547 x=x+alpha*d
548 r=-self.getCostFunction().getGradient(x) if g_Jx_new is None else -g_Jx_new
549 delta_o=delta
550 delta=self.getCostFunction().getDualProduct(r,r)
551 beta=delta/delta_o
552 d=r+beta*d
553 k=k+1
554 try:
555 lenx=len(x)
556 except:
557 lenx=x.getNumberOfDataPoints()
558 if k == lenx or self.getCostFunction().getDualProduct(r,d) <= 0:
559 d=r
560 k=0
561 i+=1
562 self._doCallback(i, x, Jx, g_Jx_new)
563 self._result=x
564
565 if i >= self._imax:
566 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
567 raise MinimizerMaxIterReached("Gave up after %d steps."%i)
568
569
570 self.logger.info("Success after %d iterations! Final delta=%e"%(i,delta))
571 return self._result
572
573
574 if __name__=="__main__":
575 # Example usage with function 'rosen' (minimum=[1,1,...1]):
576 from scipy.optimize import rosen, rosen_der
577 from esys.downunder import MeteredCostFunction
578 import sys
579 N=10
580 x0=np.array([4.]*N) # initial guess
581
582 class RosenFunc(MeteredCostFunction):
583 def __init__(self):
584 super(RosenFunc, self).__init__()
585 self.provides_inverse_Hessian_approximation=False
586 def _getDualProduct(self, f0, f1):
587 return np.dot(f0, f1)
588 def _getValue(self, x, *args):
589 return rosen(x)
590 def _getGradient(self, x, *args):
591 return rosen_der(x)
592 def _getNorm(self,x):
593 return Lsup(x)
594
595 f=RosenFunc()
596 m=None
597 if len(sys.argv)>1:
598 method=sys.argv[1].lower()
599 if method=='nlcg':
600 m=MinimizerNLCG(f)
601 elif method=='bfgs':
602 m=MinimizerBFGS(f)
603
604 if m is None:
605 # default
606 m=MinimizerLBFGS(f)
607 #m.setOptions(historySize=10000)
608
609 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
610 m.setTolerance(m_tol=1e-5)
611 m.setMaxIterations(600)
612 m.run(x0)
613 m.logSummary()
614 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
615
616 #from scipy.optimize import fmin_cg
617 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
618

  ViewVC Help
Powered by ViewVC 1.1.26