/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4079 - (show annotations)
Fri Nov 16 07:59:01 2012 UTC (6 years, 11 months ago) by gross
File MIME type: text/x-python
File size: 17527 byte(s)
some modifications to scaling in downunder. still not perfect.
1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 import sys
38 if sys.version_info[0]>2:
39 xrange=range
40
41 lslogger=logging.getLogger('inv.minimizer.linesearch')
42 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
43
44 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
45 """
46 Helper function for `line_search` below which tries to tighten the range
47 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
48 J. Nocedal for an explanation.
49 """
50 i=0
51 while True:
52 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
53 args_a=phiargs(alpha)
54 phi_a=phi(alpha, *args_a)
55 zoomlogger.debug("Zoom.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
56 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
57 alpha_hi=alpha
58 else:
59 gphi_a=gradphi(alpha, *args_a)
60 zoomlogger.debug("Zoom.grad(phi(alpha))=%e"%(gphi_a))
61 if np.abs(gphi_a) <= -c2*gphi0:
62 break
63 if gphi_a*(alpha_hi-alpha_lo) >= 0:
64 alpha_hi = alpha_lo
65 alpha_lo=alpha
66 phi_lo=phi_a
67 i+=1
68 if i>IMAX:
69 gphi_a=None
70 break
71 return alpha, phi_a, gphi_a
72
73 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
74 """
75 Line search method that satisfies the strong Wolfe conditions.
76 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
77
78 :param f: callable objective function f(x)
79 :param x: start value for the line search
80 :param p: search direction
81 :param gf: value for the gradient of f at x
82 :param fx: value of f(x)
83 :param alpha_max: algorithm terminates if alpha reaches this value
84 :param c1: value for Armijo condition (see reference)
85 :param c2: value for curvature condition (see reference)
86 :param IMAX: maximum number of iterations to perform
87 """
88 # this stores the latest gradf(x+a*p) which is returned
89 gf_new=[gf]
90
91 def phi(a, *args):
92 """ phi(a):=f(x+a*p) """
93 return f(x+a*p, *args)
94 def gradphi(a, *args):
95 gf_new[0]=f.getGradient(x+a*p, *args)
96 return f.getDualProduct(p, gf_new[0])
97
98 def phiargs(a):
99 try:
100 args=f.getArguments(x+a*p)
101 except:
102 args=()
103 return args
104
105 old_alpha=0.
106 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
107 alpha=1.
108 if fx is None:
109 args0=phiargs(0.)
110 phi0=phi(0., *args0)
111 else:
112 phi0=fx
113 lslogger.debug("phi(0)=%e"%(phi0))
114 gphi0=f.getDualProduct(p, gf) #gradphi(0., *args0)
115 lslogger.debug("grad phi(0)=%e"%(gphi0))
116 old_phi_a=phi0
117 i=1
118
119 while i<IMAX and alpha>0. and alpha<alpha_max:
120
121 args_a=phiargs(alpha)
122 phi_a=phi(alpha, *args_a)
123 lslogger.debug("Line Search.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
124 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
125 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
126 break
127
128 gphi_a=gradphi(alpha, *args_a)
129 if np.abs(gphi_a) <= -c2*gphi0:
130 break
131 if gphi_a >= 0:
132 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
133 break
134
135 old_alpha=alpha
136 # the factor is arbitrary as long as there is sufficient increase
137 alpha=2.*alpha
138 old_phi_a=phi_a
139 i+=1
140
141 return alpha, phi_a, gf_new[0]
142
143 ##############################################################################
144 class AbstractMinimizer(object):
145 """
146 Base class for function minimization methods.
147 """
148
149 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
150
151 def __init__(self, f, tol=1e-5, imax=300):
152 """
153 Initializes a new minimizer for a given cost function.
154
155 :param f: the cost function to be minimized
156 :type f: CostFunction
157 """
158 self._f=f
159 self._tol = tol
160 self._imax = imax
161 self._result = None
162 self._callback = None
163 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
164
165 def setTolerance(self, tol):
166 """
167 Sets the tolerance for the stopping criterion. The minimizer stops when
168 an appropriate norm is less than `tol`.
169 """
170 self._tol = tol
171
172 def setMaxIterations(self, imax):
173 """
174 Sets the maximum number of iterations before the minimizer terminates.
175 """
176 self._imax = imax
177
178 def setCallback(self, callback):
179 """
180 Sets a callback function to be called after every iteration.
181 The arguments to the function are: (k, x, fx, gfx), where
182 k is the current iteration, x is the current estimate, fx=f(x) and
183 gfx=grad f(x).
184 """
185 if callback is not None and not callable(callback):
186 raise TypeError("Callback function not callable.")
187 self._callback = callback
188
189 def _doCallback(self, *args):
190 if self._callback is not None:
191 self._callback(*args)
192
193 def getResult(self):
194 """
195 Returns the result of the minimization.
196 """
197 return self._result
198
199 def getOptions(self):
200 """
201 Returns a dictionary of minimizer-specific options.
202 """
203 return {}
204
205 def setOptions(self, **opts):
206 """
207 Sets minimizer-specific options. For a list of possible options see
208 `getOptions()`.
209 """
210 raise NotImplementedError
211
212 def run(self, x0):
213 """
214 Executes the minimization algorithm for *f* starting with the initial
215 guess ``x0``.
216
217 :return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED`
218 """
219 raise NotImplementedError
220
221 def logSummary(self):
222 """
223 Outputs a summary of the completed minimization process to the logger.
224 """
225 if hasattr(self._f, "Value_calls"):
226 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
227 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
228 self.logger.warning("Number of inner product evaluations: %d"%self._f.DualProduct_calls)
229 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
230
231
232 ##############################################################################
233 class MinimizerLBFGS(AbstractMinimizer):
234 """
235 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
236 method.
237 """
238
239 # History size
240 _m = 15
241
242 # Initial Hessian multiplier
243 _initial_H = 1
244
245 def getOptions(self):
246 return {'historySize':self._m,'initialHessian':self._initial_H}
247
248 def setOptions(self, **opts):
249 for o in opts:
250 if o=='historySize':
251 self._m=opts[o]
252 elif o=='initialHessian':
253 self._initial_H=opts[o]
254 else:
255 raise KeyError("Invalid option '%s'"%o)
256
257 def run(self, x):
258 args=self._f.getArguments(x)
259 gf=self._f.getGradient(x, *args)
260 fx=self._f(x, *args)
261 if self._f.provides_inverse_Hessian_approximation:
262 self._f.updateHessian()
263 invH_scale = None
264 else:
265 invH_scale = self._initial_H
266 k=0
267 error=2*self._tol
268 s_and_y=[]
269
270 self._doCallback(k, x, fx, gf)
271
272 while error > self._tol and k < self._imax:
273 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
274 if k >0:
275 self.logger.info("LBFGS.iteration %d, error=%e"%(k,error))
276 else:
277 self.logger.info("LBFGS.iteration %d"%k)
278 # determine search direction
279 p = -self._twoLoop(invH_scale, gf, s_and_y, x, *args)
280
281 if invH_scale: self.logger.debug("H = %s"%invH_scale)
282 self.logger.debug("grad f(x) = %s"%gf)
283 #self.logger.debug("p = %s"%p)
284 #self.logger.debug("x = %s"%x)
285
286 # determine step length
287 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
288 self.logger.debug("LBFGS.alpha=%e"%(alpha))
289 # execute the step
290 x_new = x + alpha*p
291 print gf_new
292 if gf_new is None:
293 args=self._f.getArguments(x_new)
294 gf_new=self._f.getGradient(x_new, args)
295
296 delta_x=x_new-x
297 delta_g=gf_new-gf
298 rho=self._f.getDualProduct(delta_x, delta_g)
299 print "rho =",rho
300 if abs(rho)>0 :
301 s_and_y.append((delta_x,delta_g, rho ))
302 else:
303 raise ZeroDivisionError("LBFGS break down.")
304 # this needs to be reviewed
305 if fx_new==0.:
306 error=fx
307 else:
308 error=abs(fx_new-fx)/abs(fx_new)
309
310 self._f.updateHessian()
311 x=x_new
312 gf=gf_new
313 fx=fx_new
314 k+=1
315 self._doCallback(k, x, fx, gf)
316 if (error<=self._tol): break
317
318 # delete oldest vector pair
319 if k>self._m: s_and_y.pop(0)
320
321 if not self._f.provides_inverse_Hessian_approximation:
322 # set the new scaling factor (approximation of inverse Hessian)
323 denom=self._f.getDualProduct(delta_g, delta_g)
324 if denom > 0:
325 invH_scale=self._f.getDualProduct(delta_x,delta_g)/denom
326 else:
327 invH_scale=self._initial_H
328 self.logger.debug("LBFGS.Break down in H update. Resetting to initial value %s."%self._initial_H)
329
330 if k >= self._imax:
331 reason=self.MAX_ITERATIONS_REACHED
332 self.logger.warning("LBFGS.Maximum number of iterations reached!")
333 else:
334 reason=self.TOLERANCE_REACHED
335 self.logger.warning("LBFGS.Success after %d iterations! Final error=%e"%(k,error))
336 self._result=x
337 return reason
338
339 def _twoLoop(self, invH_scale, gf, s_and_y, x, *args):
340 """
341 Helper for the L-BFGS method.
342 See 'Numerical Optimization' by J. Nocedal for an explanation.
343 """
344 q=gf
345 alpha=[]
346 for s,y, rho in reversed(s_and_y):
347 a=self._f.getDualProduct(s, q)/rho
348 alpha.append(a)
349 q=q-a*y
350
351 if self._f.provides_inverse_Hessian_approximation:
352 r= self._f.getInverseHessianApproximation(x, q, *args)
353 else:
354 r= invH_scale * q
355
356 for s,y,rho in s_and_y:
357 beta=self._f.getDualProduct(r, y)/rho
358 a=alpha.pop()
359 r=r+s*(a-beta)
360 return r
361
362 ##############################################################################
363 class MinimizerBFGS(AbstractMinimizer):
364 """
365 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
366 """
367
368 # Initial Hessian multiplier
369 _initial_H = 1
370
371 def getOptions(self):
372 return {'initialHessian':self._initial_H}
373
374 def setOptions(self, **opts):
375 for o in opts:
376 if o=='initialHessian':
377 self._initial_H=opts[o]
378 else:
379 raise KeyError("Invalid option '%s'"%o)
380
381 def run(self, x):
382 args=self._f.getArguments(x)
383 gf=self._f.getGradient(x, *args)
384 fx=self._f(x, *args)
385 k=0
386 try:
387 n=len(x)
388 except:
389 n=x.getNumberOfDataPoints()
390 I=np.eye(n)
391 H=self._initial_H*I
392 gnorm=Lsup(gf)
393 self._doCallback(k, x, fx, gf)
394
395 while gnorm > self._tol and k < self._imax:
396 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
397
398 # determine search direction
399 d=-self._f.getDualProduct(H, gf)
400
401 self.logger.debug("H = %s"%H)
402 self.logger.debug("grad f(x) = %s"%gf)
403 self.logger.debug("d = %s"%d)
404 self.logger.debug("x = %s"%x)
405
406 # determine step length
407 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
408 self.logger.debug("alpha=%e"%alpha)
409 # execute the step
410 x_new=x+alpha*d
411 delta_x=x_new-x
412 x=x_new
413 if gf_new is None:
414 gf_new=self._f.getGradient(x_new)
415 delta_g=gf_new-gf
416 gf=gf_new
417 k+=1
418 self._doCallback(k, x, fx, gf)
419 gnorm=Lsup(gf)
420 if (gnorm<=self._tol): break
421
422 # update Hessian
423 denom=self._f.getDualProduct(delta_x, delta_g)
424 if denom < EPSILON * gnorm:
425 denom=1e-5
426 self.logger.debug("Break down in H update. Resetting.")
427 rho=1./denom
428 self.logger.debug("rho=%e"%rho)
429 A=I-rho*delta_x[:,None]*delta_g[None,:]
430 AT=I-rho*delta_g[:,None]*delta_x[None,:]
431 H=self._f.getDualProduct(A, self._f.getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
432 if k >= self._imax:
433 reason=self.MAX_ITERATIONS_REACHED
434 self.logger.warning("Maximum number of iterations reached!")
435 else:
436 reason=self.TOLERANCE_REACHED
437 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
438
439 self._result=x
440 return reason
441
442 ##############################################################################
443 class MinimizerNLCG(AbstractMinimizer):
444 """
445 Minimizer that uses the nonlinear conjugate gradient method
446 (Fletcher-Reeves variant).
447 """
448
449 def run(self, x):
450 i=0
451 k=0
452 args=self._f.getArguments(x)
453 r=-self._f.getGradient(x, *args)
454 fx=self._f(x, *args)
455 d=r
456 delta=self._f.getDualProduct(r,r)
457 delta0=delta
458 self._doCallback(i, x, fx, -r)
459
460 while i<self._imax and Lsup(r)>self._tol:
461 self.logger.info("iteration %d"%i)
462 self.logger.debug("grad f(x) = %s"%(-r))
463 self.logger.debug("d = %s"%d)
464 self.logger.debug("x = %s"%x)
465
466 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
467 self.logger.debug("alpha=%e"%(alpha))
468 x=x+alpha*d
469 r=-self._f.getGradient(x) if gf_new is None else -gf_new
470 delta_o=delta
471 delta=self._f.getDualProduct(r,r)
472 beta=delta/delta_o
473 d=r+beta*d
474 k=k+1
475 try:
476 lenx=len(x)
477 except:
478 lenx=x.getNumberOfDataPoints()
479 if k == lenx or self._f.getDualProduct(r,d) <= 0:
480 d=r
481 k=0
482 i+=1
483 self._doCallback(i, x, fx, gf_new)
484
485 if i >= self._imax:
486 reason=self.MAX_ITERATIONS_REACHED
487 self.logger.warning("Maximum number of iterations reached!")
488 else:
489 reason=self.TOLERANCE_REACHED
490 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
491
492 self._result=x
493 return reason
494
495
496 if __name__=="__main__":
497 # Example usage with function 'rosen' (minimum=[1,1,...1]):
498 from scipy.optimize import rosen, rosen_der
499 from esys.downunder import MeteredCostFunction
500 import sys
501 N=10
502 x0=np.array([4.]*N) # initial guess
503
504 class RosenFunc(MeteredCostFunction):
505 def __init__(self):
506 super(RosenFunc, self).__init__()
507 self.provides_inverse_Hessian_approximation=False
508 def _getDualProduct(self, f0, f1):
509 return np.dot(f0, f1)
510 def _getValue(self, x, *args):
511 return rosen(x)
512 def _getGradient(self, x, *args):
513 return rosen_der(x)
514
515 f=RosenFunc()
516 m=None
517 if len(sys.argv)>1:
518 method=sys.argv[1].lower()
519 if method=='nlcg':
520 m=MinimizerNLCG(f)
521 elif method=='bfgs':
522 m=MinimizerBFGS(f)
523
524 if m is None:
525 # default
526 m=MinimizerLBFGS(f)
527 #m.setOptions(historySize=10000)
528
529 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
530 m.setTolerance(1e-5)
531 m.setMaxIterations(600)
532 m.run(x0)
533 m.logSummary()
534 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
535
536 #from scipy.optimize import fmin_cg
537 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
538

  ViewVC Help
Powered by ViewVC 1.1.26