/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4076 - (show annotations)
Thu Nov 15 03:45:24 2012 UTC (7 years ago) by gross
File MIME type: text/x-python
File size: 17212 byte(s)
    def getDirectionalDerivative(self, m, d, grad_m):
        
1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 import sys
38 if sys.version_info[0]>2:
39 xrange=range
40
41 lslogger=logging.getLogger('inv.minimizer.linesearch')
42 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
43
44 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
45 """
46 Helper function for `line_search` below which tries to tighten the range
47 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
48 J. Nocedal for an explanation.
49 """
50 i=0
51 while True:
52 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
53 args_a=phiargs(alpha)
54 phi_a=phi(alpha, *args_a)
55 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
56 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
57 alpha_hi=alpha
58 else:
59 gphi_a=gradphi(alpha, *args_a)
60 zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a))
61 if np.abs(gphi_a) <= -c2*gphi0:
62 break
63 if gphi_a*(alpha_hi-alpha_lo) >= 0:
64 alpha_hi = alpha_lo
65 alpha_lo=alpha
66 phi_lo=phi_a
67 i+=1
68 if i>IMAX:
69 gphi_a=None
70 break
71 return alpha, phi_a, gphi_a
72
73 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
74 """
75 Line search method that satisfies the strong Wolfe conditions.
76 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
77
78 :param f: callable objective function f(x)
79 :param x: start value for the line search
80 :param p: search direction
81 :param gf: value for the gradient of f at x
82 :param fx: value of f(x)
83 :param alpha_max: algorithm terminates if alpha reaches this value
84 :param c1: value for Armijo condition (see reference)
85 :param c2: value for curvature condition (see reference)
86 :param IMAX: maximum number of iterations to perform
87 """
88 # this stores the latest gradf(x+a*p) which is returned
89 gf_new=[gf]
90
91 def phi(a, *args):
92 """ phi(a):=f(x+a*p) """
93 return f(x+a*p, *args)
94 def gradphi(a, *args):
95 gf_new[0]=f.getGradient(x+a*p, *args)
96 return f.getDualProduct(p, gf_new[0])
97
98 def phiargs(a):
99 try:
100 args=f.getArguments(x+a*p)
101 except:
102 args=()
103 return args
104
105 old_alpha=0.
106 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
107 alpha=1.
108 if fx is None:
109 args0=phiargs(0.)
110 phi0=phi(0., *args0)
111 else:
112 phi0=fx
113 lslogger.debug("phi(0)=%e"%(phi0))
114 gphi0=f.getDualProduct(p, gf) #gradphi(0., *args0)
115 lslogger.debug("grad phi(0)=%e"%(gphi0))
116 old_phi_a=phi0
117 i=1
118
119 while i<IMAX and alpha>0. and alpha<alpha_max:
120
121 args_a=phiargs(alpha)
122 phi_a=phi(alpha, *args_a)
123 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
124 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
125 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
126 break
127
128 gphi_a=gradphi(alpha, *args_a)
129 if np.abs(gphi_a) <= -c2*gphi0:
130 break
131 if gphi_a >= 0:
132 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
133 break
134
135 old_alpha=alpha
136 # the factor is arbitrary as long as there is sufficient increase
137 alpha=2.*alpha
138 old_phi_a=phi_a
139 i+=1
140
141 return alpha, phi_a, gf_new[0]
142
143 ##############################################################################
144 class AbstractMinimizer(object):
145 """
146 Base class for function minimization methods.
147 """
148
149 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
150
151 def __init__(self, f, tol=1e-5, imax=300):
152 """
153 Initializes a new minimizer for a given cost function.
154
155 :param f: the cost function to be minimized
156 :type f: CostFunction
157 """
158 self._f=f
159 self._tol = tol
160 self._imax = imax
161 self._result = None
162 self._callback = None
163 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
164
165 def setTolerance(self, tol):
166 """
167 Sets the tolerance for the stopping criterion. The minimizer stops when
168 an appropriate norm is less than `tol`.
169 """
170 self._tol = tol
171
172 def setMaxIterations(self, imax):
173 """
174 Sets the maximum number of iterations before the minimizer terminates.
175 """
176 self._imax = imax
177
178 def setCallback(self, callback):
179 """
180 Sets a callback function to be called after every iteration.
181 The arguments to the function are: (k, x, fx, gfx), where
182 k is the current iteration, x is the current estimate, fx=f(x) and
183 gfx=grad f(x).
184 """
185 if callback is not None and not callable(callback):
186 raise TypeError("Callback function not callable.")
187 self._callback = callback
188
189 def _doCallback(self, *args):
190 if self._callback is not None:
191 self._callback(*args)
192
193 def getResult(self):
194 """
195 Returns the result of the minimization.
196 """
197 return self._result
198
199 def getOptions(self):
200 """
201 Returns a dictionary of minimizer-specific options.
202 """
203 return {}
204
205 def setOptions(self, **opts):
206 """
207 Sets minimizer-specific options. For a list of possible options see
208 `getOptions()`.
209 """
210 raise NotImplementedError
211
212 def run(self, x0):
213 """
214 Executes the minimization algorithm for *f* starting with the initial
215 guess ``x0``.
216
217 :return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED`
218 """
219 raise NotImplementedError
220
221 def logSummary(self):
222 """
223 Outputs a summary of the completed minimization process to the logger.
224 """
225 if hasattr(self._f, "Value_calls"):
226 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
227 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
228 self.logger.warning("Number of inner product evaluations: %d"%self._f.DualProduct_calls)
229 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
230
231
232 ##############################################################################
233 class MinimizerLBFGS(AbstractMinimizer):
234 """
235 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
236 method.
237 """
238
239 # History size
240 _m = 15
241
242 # Initial Hessian multiplier
243 _initial_H = 1
244
245 def getOptions(self):
246 return {'historySize':self._m,'initialHessian':self._initial_H}
247
248 def setOptions(self, **opts):
249 for o in opts:
250 if o=='historySize':
251 self._m=opts[o]
252 elif o=='initialHessian':
253 self._initial_H=opts[o]
254 else:
255 raise KeyError("Invalid option '%s'"%o)
256
257 def run(self, x):
258 args=self._f.getArguments(x)
259 gf=self._f.getGradient(x, *args)
260 fx=self._f(x, *args)
261 if self._f.provides_inverse_Hessian_approximation:
262 self._f.updateHessian()
263 invH_scale = None
264 else:
265 invH_scale = self._initial_H
266 k=0
267 error=2*self._tol
268 s_and_y=[]
269
270 self._doCallback(k, x, fx, gf)
271
272 while error > self._tol and k < self._imax:
273 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
274 self.logger.info("iteration %d, error=%e"%(k,error))
275 # determine search direction
276 p = -self._twoLoop(invH_scale, gf, s_and_y, x, *args)
277
278 if invH_scale: self.logger.debug("H = %s"%invH_scale)
279 self.logger.debug("grad f(x) = %s"%gf)
280 #self.logger.debug("p = %s"%p)
281 #self.logger.debug("x = %s"%x)
282
283 # determine step length
284 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
285 self.logger.debug("alpha=%e"%(alpha))
286 # execute the step
287 x_new = x + alpha*p
288 if gf_new is None:
289 args=self._f.getArguments(x_new)
290 gf_new=self._f.getGradient(x_new, args)
291
292 delta_x=x_new-x
293 delta_g=gf_new-gf
294 s_and_y.append((delta_x,delta_g, self._f.getDualProduct(delta_x, delta_g) ))
295
296 # this needs to be reviewed
297 if fx_new==0.:
298 error=fx
299 else:
300 error=abs(fx_new-fx)/abs(fx_new)
301
302 self._f.updateHessian()
303 x=x_new
304 gf=gf_new
305 fx=fx_new
306 k+=1
307 self._doCallback(k, x, fx, gf)
308 if (error<=self._tol): break
309
310 # delete oldest vector pair
311 if k>self._m: s_and_y.pop(0)
312
313 if not self._f.provides_inverse_Hessian_approximation:
314 # set the new scaling factor (approximation of inverse Hessian)
315 denom=self._f.getDualProduct(delta_g, delta_g)
316 if denom > 0:
317 invH_scale=self._f.getDualProduct(delta_x,delta_g)/denom
318 else:
319 invH_scale=self._initial_H
320 self.logger.debug("Break down in H update. Resetting to initial value %s."%self._initial_H)
321
322 if k >= self._imax:
323 reason=self.MAX_ITERATIONS_REACHED
324 self.logger.warning("Maximum number of iterations reached!")
325 else:
326 reason=self.TOLERANCE_REACHED
327 self.logger.warning("Success after %d iterations! Final error=%e"%(k,error))
328 self._result=x
329 return reason
330
331 def _twoLoop(self, invH_scale, gf, s_and_y, x, *args):
332 """
333 Helper for the L-BFGS method.
334 See 'Numerical Optimization' by J. Nocedal for an explanation.
335 """
336 q=gf
337 alpha=[]
338 for s,y, rho in reversed(s_and_y):
339 a=self._f.getDualProduct(s, q)/rho
340 alpha.append(a)
341 q=q-a*y
342
343 if self._f.provides_inverse_Hessian_approximation:
344 r= self._f.getInverseHessianApproximation(x, q, *args)
345 else:
346 r= invH_scale * q
347
348 for s,y,rho in s_and_y:
349 beta=self._f.getDualProduct(r, y)/rho
350 a=alpha.pop()
351 r=r+s*(a-beta)
352 return r
353
354 ##############################################################################
355 class MinimizerBFGS(AbstractMinimizer):
356 """
357 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
358 """
359
360 # Initial Hessian multiplier
361 _initial_H = 1
362
363 def getOptions(self):
364 return {'initialHessian':self._initial_H}
365
366 def setOptions(self, **opts):
367 for o in opts:
368 if o=='initialHessian':
369 self._initial_H=opts[o]
370 else:
371 raise KeyError("Invalid option '%s'"%o)
372
373 def run(self, x):
374 args=self._f.getArguments(x)
375 gf=self._f.getGradient(x, *args)
376 fx=self._f(x, *args)
377 k=0
378 try:
379 n=len(x)
380 except:
381 n=x.getNumberOfDataPoints()
382 I=np.eye(n)
383 H=self._initial_H*I
384 gnorm=Lsup(gf)
385 self._doCallback(k, x, fx, gf)
386
387 while gnorm > self._tol and k < self._imax:
388 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
389
390 # determine search direction
391 d=-self._f.getDualProduct(H, gf)
392
393 self.logger.debug("H = %s"%H)
394 self.logger.debug("grad f(x) = %s"%gf)
395 self.logger.debug("d = %s"%d)
396 self.logger.debug("x = %s"%x)
397
398 # determine step length
399 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
400 self.logger.debug("alpha=%e"%alpha)
401 # execute the step
402 x_new=x+alpha*d
403 delta_x=x_new-x
404 x=x_new
405 if gf_new is None:
406 gf_new=self._f.getGradient(x_new)
407 delta_g=gf_new-gf
408 gf=gf_new
409 k+=1
410 self._doCallback(k, x, fx, gf)
411 gnorm=Lsup(gf)
412 if (gnorm<=self._tol): break
413
414 # update Hessian
415 denom=self._f.getDualProduct(delta_x, delta_g)
416 if denom < EPSILON * gnorm:
417 denom=1e-5
418 self.logger.debug("Break down in H update. Resetting.")
419 rho=1./denom
420 self.logger.debug("rho=%e"%rho)
421 A=I-rho*delta_x[:,None]*delta_g[None,:]
422 AT=I-rho*delta_g[:,None]*delta_x[None,:]
423 H=self._f.getDualProduct(A, self._f.getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
424 if k >= self._imax:
425 reason=self.MAX_ITERATIONS_REACHED
426 self.logger.warning("Maximum number of iterations reached!")
427 else:
428 reason=self.TOLERANCE_REACHED
429 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
430
431 self._result=x
432 return reason
433
434 ##############################################################################
435 class MinimizerNLCG(AbstractMinimizer):
436 """
437 Minimizer that uses the nonlinear conjugate gradient method
438 (Fletcher-Reeves variant).
439 """
440
441 def run(self, x):
442 i=0
443 k=0
444 args=self._f.getArguments(x)
445 r=-self._f.getGradient(x, *args)
446 fx=self._f(x, *args)
447 d=r
448 delta=self._f.getDualProduct(r,r)
449 delta0=delta
450 self._doCallback(i, x, fx, -r)
451
452 while i<self._imax and Lsup(r)>self._tol:
453 self.logger.info("iteration %d"%i)
454 self.logger.debug("grad f(x) = %s"%(-r))
455 self.logger.debug("d = %s"%d)
456 self.logger.debug("x = %s"%x)
457
458 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
459 self.logger.debug("alpha=%e"%(alpha))
460 x=x+alpha*d
461 r=-self._f.getGradient(x) if gf_new is None else -gf_new
462 delta_o=delta
463 delta=self._f.getDualProduct(r,r)
464 beta=delta/delta_o
465 d=r+beta*d
466 k=k+1
467 try:
468 lenx=len(x)
469 except:
470 lenx=x.getNumberOfDataPoints()
471 if k == lenx or self._f.getDualProduct(r,d) <= 0:
472 d=r
473 k=0
474 i+=1
475 self._doCallback(i, x, fx, gf_new)
476
477 if i >= self._imax:
478 reason=self.MAX_ITERATIONS_REACHED
479 self.logger.warning("Maximum number of iterations reached!")
480 else:
481 reason=self.TOLERANCE_REACHED
482 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
483
484 self._result=x
485 return reason
486
487
488 if __name__=="__main__":
489 # Example usage with function 'rosen' (minimum=[1,1,...1]):
490 from scipy.optimize import rosen, rosen_der
491 from esys.downunder import MeteredCostFunction
492 import sys
493 N=10
494 x0=np.array([4.]*N) # initial guess
495
496 class RosenFunc(MeteredCostFunction):
497 def __init__(self):
498 super(RosenFunc, self).__init__()
499 self.provides_inverse_Hessian_approximation=False
500 def _getDualProduct(self, f0, f1):
501 return np.dot(f0, f1)
502 def _getValue(self, x, *args):
503 return rosen(x)
504 def _getGradient(self, x, *args):
505 return rosen_der(x)
506
507 f=RosenFunc()
508 m=None
509 if len(sys.argv)>1:
510 method=sys.argv[1].lower()
511 if method=='nlcg':
512 m=MinimizerNLCG(f)
513 elif method=='bfgs':
514 m=MinimizerBFGS(f)
515
516 if m is None:
517 # default
518 m=MinimizerLBFGS(f)
519 #m.setOptions(historySize=10000)
520
521 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
522 m.setTolerance(1e-5)
523 m.setMaxIterations(600)
524 m.run(x0)
525 m.logSummary()
526 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
527
528 #from scipy.optimize import fmin_cg
529 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
530

  ViewVC Help
Powered by ViewVC 1.1.26