/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3950 - (show annotations)
Fri Aug 24 06:10:02 2012 UTC (7 years, 2 months ago) by caltinay
File MIME type: text/x-python
File size: 16393 byte(s)
More tests and corrections in the alternative minimizers.

1
2 ########################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # Earth Systems Science Computational Center (ESSCC)
6 # http://www.uq.edu.au/esscc
7 #
8 # Primary Business: Queensland, Australia
9 # Licensed under the Open Software License version 3.0
10 # http://www.opensource.org/licenses/osl-3.0.php
11 #
12 ########################################################
13
14 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
15 Earth Systems Science Computational Center (ESSCC)
16 http://www.uq.edu.au/esscc
17 Primary Business: Queensland, Australia"""
18 __license__="""Licensed under the Open Software License version 3.0
19 http://www.opensource.org/licenses/osl-3.0.php"""
20 __url__="https://launchpad.net/escript-finley"
21
22 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
23
24 import logging
25 import numpy as np
26 try:
27 from esys.escript import Lsup, sqrt, EPSILON
28 except:
29 Lsup=lambda x: np.amax(abs(x))
30 sqrt=np.sqrt
31 EPSILON=1e-18
32
33 lslogger=logging.getLogger('inv.minimizer.linesearch')
34 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
35
36 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
37 """
38 Helper function for `line_search` below which tries to tighten the range
39 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
40 J. Nocedal for an explanation.
41 """
42 i=0
43 while True:
44 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
45 args_a=phiargs(alpha)
46 phi_a=phi(alpha, *args_a)
47 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
48 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
49 alpha_hi=alpha
50 else:
51 gphi_a=gradphi(alpha, *args_a)
52 zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a))
53 if np.abs(gphi_a) <= -c2*gphi0:
54 break
55 if gphi_a*(alpha_hi-alpha_lo) >= 0:
56 alpha_hi = alpha_lo
57 alpha_lo=alpha
58 phi_lo=phi_a
59 i+=1
60 if i>IMAX:
61 gphi_a=None
62 break
63 return alpha, phi_a, gphi_a
64
65 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
66 """
67 Line search method that satisfies the strong Wolfe conditions.
68 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
69
70 :param f: callable objective function f(x)
71 :param x: start value for the line search
72 :param p: search direction
73 :param gf: value for the gradient of f at x
74 :param fx: value of f(x)
75 :param alpha_max: algorithm terminates if alpha reaches this value
76 :param c1: value for Armijo condition (see reference)
77 :param c2: value for curvature condition (see reference)
78 :param IMAX: maximum number of iterations to perform
79 """
80 # this stores the latest gradf(x+a*p) which is returned
81 gf_new=[gf]
82
83 def phi(a, *args):
84 """ phi(a):=f(x+a*p) """
85 return f(x+a*p, *args)
86 def gradphi(a, *args):
87 gf_new[0]=f.getGradient(x+a*p, *args)
88 return f.getInner(gf_new[0], p)
89 #return f.getDirectionalDerivative(x+a*p, p, *args)
90 def phiargs(a):
91 try:
92 args=f.getArguments(x+a*p)
93 except:
94 args=()
95 return args
96
97 old_alpha=0.
98 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
99 alpha=1.
100 if fx is None:
101 args0=phiargs(0.)
102 phi0=phi(0., *args0)
103 else:
104 phi0=fx
105 lslogger.debug("phi(0)=%e"%(phi0))
106 gphi0=f.getInner(gf, p) #gradphi(0., *args0)
107 lslogger.debug("grad phi(0)=%e"%(gphi0))
108 old_phi_a=phi0
109 i=1
110
111 while i<IMAX and alpha>0. and alpha<alpha_max:
112 args_a=phiargs(alpha)
113 phi_a=phi(alpha, *args_a)
114 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
115 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
116 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
117 break
118 gphi_a=gradphi(alpha, *args_a)
119 if np.abs(gphi_a) <= -c2*gphi0:
120 break
121 if gphi_a >= 0:
122 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
123 break
124
125 old_alpha=alpha
126 # the factor is arbitrary as long as there is sufficient increase
127 alpha=2.*alpha
128 old_phi_a=phi_a
129 i+=1
130
131 return alpha, phi_a, gf_new[0]
132
133 ##############################################################################
134 class AbstractMinimizer(object):
135 """
136 Base class for function minimization methods.
137 """
138
139 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
140
141 def __init__(self, f, tol=1e-5, imax=300):
142 """
143 Initializes a new minimizer for a given cost function.
144
145 :param f: the cost function to be minimized
146 :type f: CostFunction
147 """
148 self._f=f
149 self._tol = tol
150 self._imax = imax
151 self._result = None
152 self._callback = None
153 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
154
155 def setTolerance(self, tol):
156 """
157 Sets the tolerance for the stopping criterion. The minimizer stops when
158 an appropriate norm is less than `tol`.
159 """
160 self._tol = tol
161
162 def setMaxIterations(self, imax):
163 """
164 Sets the maximum number of iterations before the minimizer terminates.
165 """
166 self._imax = imax
167
168 def setCallback(self, callback):
169 """
170 Sets a callback function to be called after every iteration.
171 The arguments to the function are: (k, x, fx, gfx), where
172 k is the current iteration, x is the current estimate, fx=f(x) and
173 gfx=grad f(x).
174 """
175 if callback is not None and not callable(callback):
176 raise TypeError("Callback function not callable.")
177 self._callback = callback
178
179 def _doCallback(self, *args):
180 if self._callback is not None:
181 self._callback(*args)
182
183 def getResult(self):
184 """
185 Returns the result of the minimization.
186 """
187 return self._result
188
189 def getOptions(self):
190 """
191 Returns a dictionary of minimizer-specific options.
192 """
193 return {}
194
195 def setOptions(self, **opts):
196 """
197 Sets minimizer-specific options. For a list of possible options see
198 `getOptions()`.
199 """
200 raise NotImplementedError
201
202 def run(self, x0):
203 """
204 Executes the minimization algorithm for `f` starting with the initial
205 guess `x0`.
206 :return: TOLERANCE_REACHED or MAX_ITERATIONS_REACHED
207 """
208 raise NotImplementedError
209
210 def logSummary(self):
211 """
212 Outputs a summary of the completed minimization process to the logger.
213 """
214 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
215 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
216 self.logger.warning("Number of inner product evaluations: %d"%self._f.Inner_calls)
217 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
218
219
220 ##############################################################################
221 class MinimizerLBFGS(AbstractMinimizer):
222 """
223 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
224 method.
225 """
226
227 # History size
228 _m = 15
229
230 # Initial Hessian multiplier
231 _initial_H = 1
232
233 def getOptions(self):
234 return {'historySize':self._m,'initialHessian':self._initial_H}
235
236 def setOptions(self, **opts):
237 for o in opts:
238 if o=='historySize':
239 self._m=opts[o]
240 elif o=='initialHessian':
241 self._initial_H=opts[o]
242 else:
243 raise KeyError("Invalid option '%s'"%o)
244
245 def getNorm(self, x):
246 return sqrt(self._f.getInner(x, x))
247
248 def run(self, x):
249 args=self._f.getArguments(x)
250 gf=self._f.getGradient(x, *args)
251 fx=self._f(x, *args)
252 k=0
253 H=self._initial_H
254 error=2*self._tol
255 s_and_y=[]
256
257 self._doCallback(k, x, fx, gf)
258
259 while error > self._tol and k < self._imax:
260 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
261 self.logger.info("iteration %d, error=%e"%(k,error))
262 # determine search direction
263 p = -self._twoLoop(H, gf, s_and_y)
264
265 self.logger.debug("H = %s"%H)
266 self.logger.debug("grad f(x) = %s"%gf)
267 self.logger.debug("p = %s"%p)
268 self.logger.debug("x = %s"%x)
269
270 # determine step length
271 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
272 self.logger.debug("alpha=%e"%(alpha))
273 # execute the step
274 x_new = x + alpha*p
275 if gf_new is None:
276 gf_new=self._f.getGradient(x_new)
277 delta_x=x_new-x
278 delta_g=gf_new-gf
279 s_and_y.append((delta_x,delta_g))
280 if fx_new==0.:
281 error=fx
282 else:
283 error=abs(fx_new-fx)/abs(fx_new)
284 x=x_new
285 gf=gf_new
286 fx=fx_new
287 k+=1
288 self._doCallback(k, x, fx, gf)
289 if (error<=self._tol): break
290
291 # delete oldest vector pair
292 if k>self._m: s_and_y.pop(0)
293
294 # set the new scaling factor (approximation of inverse Hessian)
295 gnorm=self.getNorm(gf)
296 denom=self.getNorm(delta_g)
297 if denom < EPSILON * gnorm:
298 H=1.
299 self.logger.debug("Break down in H update. Resetting to 1.")
300 else:
301 H=self._f.getInner(delta_x,delta_g)/denom**2
302
303 if k >= self._imax:
304 reason=self.MAX_ITERATIONS_REACHED
305 self.logger.warning("Maximum number of iterations reached!")
306 else:
307 reason=self.TOLERANCE_REACHED
308 self.logger.warning("Success after %d iterations! Final error=%e"%(k,error))
309 self._result=x
310 return reason
311
312 def _twoLoop(self, H, gf, s_and_y):
313 """
314 Helper for the L-BFGS method.
315 See 'Numerical Optimization' by J. Nocedal for an explanation.
316 """
317 q=gf
318 alpha=[]
319 for s,y in reversed(s_and_y):
320 rho=1./(self._f.getInner(s, y))
321 a=rho*self._f.getInner(s, q)
322 alpha.append(a)
323 q=q-a*y
324
325 r=H*q
326 for s,y in s_and_y:
327 rho=1./(self._f.getInner(s, y))
328 beta=rho*self._f.getInner(y,r)
329 a=alpha.pop()
330 r=r+s*(a-beta)
331 return r
332
333 ##############################################################################
334 class MinimizerBFGS(AbstractMinimizer):
335 """
336 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
337 """
338
339 # Initial Hessian multiplier
340 _initial_H = 1
341
342 def getOptions(self):
343 return {'initialHessian':self._initial_H}
344
345 def setOptions(self, **opts):
346 for o in opts:
347 if o=='initialHessian':
348 self._initial_H=opts[o]
349 else:
350 raise KeyError("Invalid option '%s'"%o)
351
352 def run(self, x):
353 args=self._f.getArguments(x)
354 gf=self._f.getGradient(x, *args)
355 fx=self._f(x, *args)
356 k=0
357 try:
358 n=len(x)
359 except:
360 n=x.getNumberOfDataPoints()
361 I=np.eye(n)
362 H=self._initial_H*I
363 gnorm=Lsup(gf)
364 self._doCallback(k, x, fx, gf)
365
366 while gnorm > self._tol and k < self._imax:
367 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
368
369 # determine search direction
370 d=-self._f.getInner(H, gf)
371
372 self.logger.debug("H = %s"%H)
373 self.logger.debug("grad f(x) = %s"%gf)
374 self.logger.debug("d = %s"%d)
375 self.logger.debug("x = %s"%x)
376
377 # determine step length
378 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
379 self.logger.debug("alpha=%e"%alpha)
380 # execute the step
381 x_new=x+alpha*d
382 delta_x=x_new-x
383 x=x_new
384 if gf_new is None:
385 gf_new=self._f.getGradient(x_new)
386 delta_g=gf_new-gf
387 gf=gf_new
388 k+=1
389 self._doCallback(k, x, fx, gf)
390 gnorm=Lsup(gf)
391 if (gnorm<=self._tol): break
392
393 # update Hessian
394 denom=self._f.getInner(delta_x, delta_g)
395 if denom < EPSILON * gnorm:
396 denom=1e-5
397 self.logger.debug("Break down in H update. Resetting.")
398 rho=1./denom
399 self.logger.debug("rho=%e"%rho)
400 A=I-rho*delta_x[:,None]*delta_g[None,:]
401 AT=I-rho*delta_g[:,None]*delta_x[None,:]
402 H=self._f.getInner(A, self._f.getInner(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
403 if k >= self._imax:
404 reason=self.MAX_ITERATIONS_REACHED
405 self.logger.warning("Maximum number of iterations reached!")
406 else:
407 reason=self.TOLERANCE_REACHED
408 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
409
410 self._result=x
411 return reason
412
413 ##############################################################################
414 class MinimizerNLCG(AbstractMinimizer):
415 """
416 Minimizer that uses the nonlinear conjugate gradient method
417 (Fletcher-Reeves variant).
418 """
419
420 def run(self, x):
421 i=0
422 k=0
423 args=self._f.getArguments(x)
424 r=-self._f.getGradient(x, *args)
425 fx=self._f(x, *args)
426 d=r
427 delta=self._f.getInner(r,r)
428 delta0=delta
429 self._doCallback(i, x, fx, -r)
430
431 while i<self._imax and Lsup(r)>self._tol:
432 self.logger.info("iteration %d"%i)
433 self.logger.debug("grad f(x) = %s"%(-r))
434 self.logger.debug("d = %s"%d)
435 self.logger.debug("x = %s"%x)
436
437 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
438 self.logger.debug("alpha=%e"%(alpha))
439 x=x+alpha*d
440 r=-self._f.getGradient(x) if gf_new is None else -gf_new
441 delta_o=delta
442 delta=self._f.getInner(r,r)
443 beta=delta/delta_o
444 d=r+beta*d
445 k=k+1
446 try:
447 lenx=len(x)
448 except:
449 lenx=x.getNumberOfDataPoints()
450 if k == lenx or self._f.getInner(r,d) <= 0:
451 d=r
452 k=0
453 i+=1
454 self._doCallback(i, x, fx, gf_new)
455
456 if i >= self._imax:
457 reason=self.MAX_ITERATIONS_REACHED
458 self.logger.warning("Maximum number of iterations reached!")
459 else:
460 reason=self.TOLERANCE_REACHED
461 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
462
463 self._result=x
464 return reason
465
466
467 if __name__=="__main__":
468 # Example usage with function 'rosen' (minimum=[1,1,...1]):
469 from scipy.optimize import rosen, rosen_der
470 from esys.downunder.costfunctions import CostFunction
471 import sys
472 N=100
473 x0=np.array([4.]*N) # initial guess
474
475 class RosenFunc(CostFunction):
476 def _getInner(self, f0, f1):
477 return np.dot(f0, f1)
478 def _getValue(self, x, *args):
479 return rosen(x)
480 def _getGradient(self, x, *args):
481 return rosen_der(x)
482
483 f=RosenFunc()
484 m=None
485 if len(sys.argv)>1:
486 method=sys.argv[1].lower()
487 if method=='nlcg':
488 m=MinimizerNLCG(f)
489 elif method=='bfgs':
490 m=MinimizerBFGS(f)
491
492 if m is None:
493 # default
494 m=MinimizerLBFGS(f)
495 #m.setOptions(historySize=10000)
496
497 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.INFO)
498 m.setTolerance(1e-5)
499 m.setMaxIterations(600)
500 m.run(x0)
501 m.logSummary()
502 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
503
504 #from scipy.optimize import fmin_cg
505 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
506

  ViewVC Help
Powered by ViewVC 1.1.26