/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4097 - (show annotations)
Fri Dec 7 01:18:35 2012 UTC (6 years, 8 months ago) by caltinay
File MIME type: text/x-python
File size: 17615 byte(s)
Minor edits.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 lslogger=logging.getLogger('inv.minimizer.linesearch')
38 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
39
40 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
41 """
42 Helper function for `line_search` below which tries to tighten the range
43 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
44 J. Nocedal for an explanation.
45 """
46 i=0
47 while True:
48 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
49 args_a=phiargs(alpha)
50 phi_a=phi(alpha, *args_a)
51 zoomlogger.debug("Zoom.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
52 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
53 alpha_hi=alpha
54 else:
55 gphi_a=gradphi(alpha, *args_a)
56 zoomlogger.debug("Zoom.grad(phi(alpha))=%e"%(gphi_a))
57 if np.abs(gphi_a) <= -c2*gphi0:
58 break
59 if gphi_a*(alpha_hi-alpha_lo) >= 0:
60 alpha_hi = alpha_lo
61 alpha_lo=alpha
62 phi_lo=phi_a
63 i+=1
64 if i>IMAX:
65 gphi_a=None
66 break
67 return alpha, phi_a, gphi_a
68
69 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
70 """
71 Line search method that satisfies the strong Wolfe conditions.
72 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
73
74 :param f: callable objective function f(x)
75 :param x: start value for the line search
76 :param p: search direction
77 :param gf: value for the gradient of f at x
78 :param fx: value of f(x)
79 :param alpha_max: algorithm terminates if alpha reaches this value
80 :param c1: value for Armijo condition (see reference)
81 :param c2: value for curvature condition (see reference)
82 :param IMAX: maximum number of iterations to perform
83 """
84 # this stores the latest gradf(x+a*p) which is returned
85 gf_new=[gf]
86
87 def phi(a, *args):
88 """ phi(a):=f(x+a*p) """
89 return f(x+a*p, *args)
90 def gradphi(a, *args):
91 gf_new[0]=f.getGradient(x+a*p, *args)
92 return f.getDualProduct(p, gf_new[0])
93
94 def phiargs(a):
95 try:
96 args=f.getArguments(x+a*p)
97 except:
98 args=()
99 return args
100
101 old_alpha=0.
102 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
103 alpha=1.
104 if fx is None:
105 args0=phiargs(0.)
106 phi0=phi(0., *args0)
107 else:
108 phi0=fx
109 lslogger.debug("phi(0)=%e"%(phi0))
110 gphi0=f.getDualProduct(p, gf) #gradphi(0., *args0)
111 lslogger.debug("grad phi(0)=%e"%(gphi0))
112 old_phi_a=phi0
113 i=1
114
115 while i<IMAX and alpha>0. and alpha<alpha_max:
116
117 args_a=phiargs(alpha)
118 phi_a=phi(alpha, *args_a)
119 lslogger.debug("Line Search.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
120 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
121 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
122 break
123
124 gphi_a=gradphi(alpha, *args_a)
125 if np.abs(gphi_a) <= -c2*gphi0:
126 break
127 if gphi_a >= 0:
128 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
129 break
130
131 old_alpha=alpha
132 # the factor is arbitrary as long as there is sufficient increase
133 alpha=2.*alpha
134 old_phi_a=phi_a
135 i+=1
136
137 return alpha, phi_a, gf_new[0]
138
139 ##############################################################################
140 class AbstractMinimizer(object):
141 """
142 Base class for function minimization methods.
143 """
144
145 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(range(2))
146
147 def __init__(self, f, tol=1e-5, imax=300):
148 """
149 Initializes a new minimizer for a given cost function.
150
151 :param f: the cost function to be minimized
152 :type f: CostFunction
153 """
154 self._f=f
155 self._tol = tol
156 self._imax = imax
157 self._result = None
158 self._callback = None
159 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
160
161 def setTolerance(self, tol):
162 """
163 Sets the tolerance for the stopping criterion. The minimizer stops when
164 an appropriate norm is less than `tol`.
165 """
166 self._tol = tol
167
168 def setMaxIterations(self, imax):
169 """
170 Sets the maximum number of iterations before the minimizer terminates.
171 """
172 self._imax = imax
173
174 def setCallback(self, callback):
175 """
176 Sets a callback function to be called after every iteration.
177 The arguments to the function are: (k, x, fx, gfx), where
178 k is the current iteration, x is the current estimate, fx=f(x) and
179 gfx=grad f(x).
180 """
181 if callback is not None and not callable(callback):
182 raise TypeError("Callback function not callable.")
183 self._callback = callback
184
185 def _doCallback(self, *args):
186 if self._callback is not None:
187 self._callback(*args)
188
189 def getResult(self):
190 """
191 Returns the result of the minimization.
192 """
193 return self._result
194
195 def getOptions(self):
196 """
197 Returns a dictionary of minimizer-specific options.
198 """
199 return {}
200
201 def setOptions(self, **opts):
202 """
203 Sets minimizer-specific options. For a list of possible options see
204 `getOptions()`.
205 """
206 raise NotImplementedError
207
208 def run(self, x0):
209 """
210 Executes the minimization algorithm for *f* starting with the initial
211 guess ``x0``.
212
213 :return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED`
214 """
215 raise NotImplementedError
216
217 def logSummary(self):
218 """
219 Outputs a summary of the completed minimization process to the logger.
220 """
221 if hasattr(self._f, "Value_calls"):
222 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
223 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
224 self.logger.warning("Number of inner product evaluations: %d"%self._f.DualProduct_calls)
225 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
226
227
228 ##############################################################################
229 class MinimizerLBFGS(AbstractMinimizer):
230 """
231 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
232 method.
233 """
234
235 # History size
236 _m = 15
237
238 # Initial Hessian multiplier
239 _initial_H = 1
240
241 def getOptions(self):
242 return {'historySize':self._m,'initialHessian':self._initial_H}
243
244 def setOptions(self, **opts):
245 for o in opts:
246 if o=='historySize':
247 self._m=opts[o]
248 elif o=='initialHessian':
249 self._initial_H=opts[o]
250 else:
251 raise KeyError("Invalid option '%s'"%o)
252
253 def run(self, x):
254 args=self._f.getArguments(x)
255 gf=self._f.getGradient(x, *args)
256 fx=self._f(x, *args)
257 if self._f.provides_inverse_Hessian_approximation:
258 self._f.updateHessian()
259 invH_scale = None
260 else:
261 invH_scale = self._initial_H
262 k=0
263 error=2*self._tol
264 s_and_y=[]
265
266 self._doCallback(k, x, fx, gf)
267
268 while error > self._tol and k < self._imax:
269 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
270 if k >0:
271 self.logger.info("LBFGS.iteration %d, error=%e"%(k,error))
272 else:
273 self.logger.info("LBFGS.iteration %d"%k)
274 # determine search direction
275 p = -self._twoLoop(invH_scale, gf, s_and_y, x, *args)
276
277 if invH_scale: self.logger.debug("H = %s"%invH_scale)
278 self.logger.debug("grad f(x) = %s"%gf)
279 #self.logger.debug("p = %s"%p)
280 #self.logger.debug("x = %s"%x)
281
282 # determine step length
283 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
284 self.logger.debug("LBFGS.alpha=%e"%(alpha))
285 # execute the step
286 x_new = x + alpha*p
287 print gf_new
288 if gf_new is None:
289 args=self._f.getArguments(x_new)
290 gf_new=self._f.getGradient(x_new, args)
291
292 delta_x=x_new-x
293 delta_g=gf_new-gf
294 rho=self._f.getDualProduct(delta_x, delta_g)
295 print "rho =",rho
296 if abs(rho)>0 :
297 s_and_y.append((delta_x,delta_g, rho ))
298 else:
299 raise ZeroDivisionError("LBFGS break down.")
300 # this needs to be reviewed
301 if fx_new==0.:
302 error=fx
303 else:
304 error=abs(fx_new-fx)/abs(fx_new)
305
306 self._f.updateHessian()
307 x=x_new
308 gf=gf_new
309 fx=fx_new
310 k+=1
311 self._doCallback(k, x, fx, gf)
312 if (error<=self._tol): break
313
314 # delete oldest vector pair
315 if k>self._m: s_and_y.pop(0)
316
317 if not self._f.provides_inverse_Hessian_approximation:
318 # set the new scaling factor (approximation of inverse Hessian)
319 denom=self._f.getDualProduct(delta_g, delta_g)
320 if denom > 0:
321 invH_scale=self._f.getDualProduct(delta_x,delta_g)/denom
322 else:
323 invH_scale=self._initial_H
324 self.logger.debug("LBFGS.Break down in H update. Resetting to initial value %s."%self._initial_H)
325
326 if k >= self._imax:
327 reason=self.MAX_ITERATIONS_REACHED
328 self.logger.warning("LBFGS.Maximum number of iterations reached!")
329 else:
330 reason=self.TOLERANCE_REACHED
331 self.logger.warning("LBFGS.Success after %d iterations! Final error=%e"%(k,error))
332 self._result=x
333 return reason
334
335 def _twoLoop(self, invH_scale, gf, s_and_y, x, *args):
336 """
337 Helper for the L-BFGS method.
338 See 'Numerical Optimization' by J. Nocedal for an explanation.
339 """
340 q=gf
341 alpha=[]
342 for s,y, rho in reversed(s_and_y):
343 a=self._f.getDualProduct(s, q)/rho
344 alpha.append(a)
345 q=q-a*y
346
347 if self._f.provides_inverse_Hessian_approximation:
348 r = self._f.getInverseHessianApproximation(x, q, *args)
349 else:
350 r = invH_scale * q
351
352 for s,y,rho in s_and_y:
353 beta = self._f.getDualProduct(r, y)/rho
354 a = alpha.pop()
355 r = r + s * (a-beta)
356 return r
357
358 ##############################################################################
359 class MinimizerBFGS(AbstractMinimizer):
360 """
361 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
362 """
363
364 # Initial Hessian multiplier
365 _initial_H = 1
366
367 def getOptions(self):
368 return {'initialHessian':self._initial_H}
369
370 def setOptions(self, **opts):
371 for o in opts:
372 if o=='initialHessian':
373 self._initial_H=opts[o]
374 else:
375 raise KeyError("Invalid option '%s'"%o)
376
377 def run(self, x):
378 args=self._f.getArguments(x)
379 gf=self._f.getGradient(x, *args)
380 fx=self._f(x, *args)
381 k=0
382 try:
383 n=len(x)
384 except:
385 n=x.getNumberOfDataPoints()
386 I=np.eye(n)
387 H=self._initial_H*I
388 gnorm=Lsup(gf)
389 self._doCallback(k, x, fx, gf)
390
391 while gnorm > self._tol and k < self._imax:
392 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
393
394 # determine search direction
395 d=-self._f.getDualProduct(H, gf)
396
397 self.logger.debug("H = %s"%H)
398 self.logger.debug("grad f(x) = %s"%gf)
399 self.logger.debug("d = %s"%d)
400 self.logger.debug("x = %s"%x)
401
402 # determine step length
403 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
404 self.logger.debug("alpha=%e"%alpha)
405 # execute the step
406 x_new=x+alpha*d
407 delta_x=x_new-x
408 x=x_new
409 if gf_new is None:
410 gf_new=self._f.getGradient(x_new)
411 delta_g=gf_new-gf
412 gf=gf_new
413 k+=1
414 self._doCallback(k, x, fx, gf)
415 gnorm=Lsup(gf)
416 if (gnorm<=self._tol): break
417
418 # update Hessian
419 denom=self._f.getDualProduct(delta_x, delta_g)
420 if denom < EPSILON * gnorm:
421 denom=1e-5
422 self.logger.debug("Break down in H update. Resetting.")
423 rho=1./denom
424 self.logger.debug("rho=%e"%rho)
425 A=I-rho*delta_x[:,None]*delta_g[None,:]
426 AT=I-rho*delta_g[:,None]*delta_x[None,:]
427 H=self._f.getDualProduct(A, self._f.getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
428 if k >= self._imax:
429 reason=self.MAX_ITERATIONS_REACHED
430 self.logger.warning("Maximum number of iterations reached!")
431 else:
432 reason=self.TOLERANCE_REACHED
433 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
434
435 self._result=x
436 return reason
437
438 ##############################################################################
439 class MinimizerNLCG(AbstractMinimizer):
440 """
441 Minimizer that uses the nonlinear conjugate gradient method
442 (Fletcher-Reeves variant).
443 """
444
445 def run(self, x):
446 i=0
447 k=0
448 args=self._f.getArguments(x)
449 r=-self._f.getGradient(x, *args)
450 fx=self._f(x, *args)
451 d=r
452 delta=self._f.getDualProduct(r,r)
453 delta0=delta
454 self._doCallback(i, x, fx, -r)
455
456 while i<self._imax and Lsup(r)>self._tol:
457 self.logger.info("iteration %d"%i)
458 self.logger.debug("grad f(x) = %s"%(-r))
459 self.logger.debug("d = %s"%d)
460 self.logger.debug("x = %s"%x)
461
462 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
463 self.logger.debug("alpha=%e"%(alpha))
464 x=x+alpha*d
465 r=-self._f.getGradient(x) if gf_new is None else -gf_new
466 delta_o=delta
467 delta=self._f.getDualProduct(r,r)
468 beta=delta/delta_o
469 d=r+beta*d
470 k=k+1
471 try:
472 lenx=len(x)
473 except:
474 lenx=x.getNumberOfDataPoints()
475 if k == lenx or self._f.getDualProduct(r,d) <= 0:
476 d=r
477 k=0
478 i+=1
479 self._doCallback(i, x, fx, gf_new)
480
481 if i >= self._imax:
482 reason=self.MAX_ITERATIONS_REACHED
483 self.logger.warning("Maximum number of iterations reached!")
484 else:
485 reason=self.TOLERANCE_REACHED
486 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
487
488 self._result=x
489 return reason
490
491
492 if __name__=="__main__":
493 # Example usage with function 'rosen' (minimum=[1,1,...1]):
494 from scipy.optimize import rosen, rosen_der
495 from esys.downunder import MeteredCostFunction
496 import sys
497 N=10
498 x0=np.array([4.]*N) # initial guess
499
500 class RosenFunc(MeteredCostFunction):
501 def __init__(self):
502 super(RosenFunc, self).__init__()
503 self.provides_inverse_Hessian_approximation=False
504 def _getDualProduct(self, f0, f1):
505 return np.dot(f0, f1)
506 def _getValue(self, x, *args):
507 return rosen(x)
508 def _getGradient(self, x, *args):
509 return rosen_der(x)
510
511 f=RosenFunc()
512 m=None
513 if len(sys.argv)>1:
514 method=sys.argv[1].lower()
515 if method=='nlcg':
516 m=MinimizerNLCG(f)
517 elif method=='bfgs':
518 m=MinimizerBFGS(f)
519
520 if m is None:
521 # default
522 m=MinimizerLBFGS(f)
523 #m.setOptions(historySize=10000)
524
525 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
526 m.setTolerance(1e-5)
527 m.setMaxIterations(600)
528 m.run(x0)
529 m.logSummary()
530 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
531
532 #from scipy.optimize import fmin_cg
533 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
534

  ViewVC Help
Powered by ViewVC 1.1.26