/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3981 - (show annotations)
Fri Sep 21 02:47:54 2012 UTC (7 years, 1 month ago) by jfenwick
File MIME type: text/x-python
File size: 16455 byte(s)
First pass of updating copyright notices
1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
17 http://www.uq.edu.au
18 Primary Business: Queensland, Australia"""
19 __license__="""Licensed under the Open Software License version 3.0
20 http://www.opensource.org/licenses/osl-3.0.php"""
21 __url__="https://launchpad.net/escript-finley"
22
23 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
24
25 import logging
26 import numpy as np
27 try:
28 from esys.escript import Lsup, sqrt, EPSILON
29 except:
30 Lsup=lambda x: np.amax(abs(x))
31 sqrt=np.sqrt
32 EPSILON=1e-18
33
34 lslogger=logging.getLogger('inv.minimizer.linesearch')
35 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
36
37 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
38 """
39 Helper function for `line_search` below which tries to tighten the range
40 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
41 J. Nocedal for an explanation.
42 """
43 i=0
44 while True:
45 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
46 args_a=phiargs(alpha)
47 phi_a=phi(alpha, *args_a)
48 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
49 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
50 alpha_hi=alpha
51 else:
52 gphi_a=gradphi(alpha, *args_a)
53 zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a))
54 if np.abs(gphi_a) <= -c2*gphi0:
55 break
56 if gphi_a*(alpha_hi-alpha_lo) >= 0:
57 alpha_hi = alpha_lo
58 alpha_lo=alpha
59 phi_lo=phi_a
60 i+=1
61 if i>IMAX:
62 gphi_a=None
63 break
64 return alpha, phi_a, gphi_a
65
66 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
67 """
68 Line search method that satisfies the strong Wolfe conditions.
69 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
70
71 :param f: callable objective function f(x)
72 :param x: start value for the line search
73 :param p: search direction
74 :param gf: value for the gradient of f at x
75 :param fx: value of f(x)
76 :param alpha_max: algorithm terminates if alpha reaches this value
77 :param c1: value for Armijo condition (see reference)
78 :param c2: value for curvature condition (see reference)
79 :param IMAX: maximum number of iterations to perform
80 """
81 # this stores the latest gradf(x+a*p) which is returned
82 gf_new=[gf]
83
84 def phi(a, *args):
85 """ phi(a):=f(x+a*p) """
86 return f(x+a*p, *args)
87 def gradphi(a, *args):
88 gf_new[0]=f.getGradient(x+a*p, *args)
89 return f.getInner(gf_new[0], p)
90 #return f.getDirectionalDerivative(x+a*p, p, *args)
91 def phiargs(a):
92 try:
93 args=f.getArguments(x+a*p)
94 except:
95 args=()
96 return args
97
98 old_alpha=0.
99 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
100 alpha=1.
101 if fx is None:
102 args0=phiargs(0.)
103 phi0=phi(0., *args0)
104 else:
105 phi0=fx
106 lslogger.debug("phi(0)=%e"%(phi0))
107 gphi0=f.getInner(gf, p) #gradphi(0., *args0)
108 lslogger.debug("grad phi(0)=%e"%(gphi0))
109 old_phi_a=phi0
110 i=1
111
112 while i<IMAX and alpha>0. and alpha<alpha_max:
113 args_a=phiargs(alpha)
114 phi_a=phi(alpha, *args_a)
115 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
116 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
117 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
118 break
119 gphi_a=gradphi(alpha, *args_a)
120 if np.abs(gphi_a) <= -c2*gphi0:
121 break
122 if gphi_a >= 0:
123 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
124 break
125
126 old_alpha=alpha
127 # the factor is arbitrary as long as there is sufficient increase
128 alpha=2.*alpha
129 old_phi_a=phi_a
130 i+=1
131
132 return alpha, phi_a, gf_new[0]
133
134 ##############################################################################
135 class AbstractMinimizer(object):
136 """
137 Base class for function minimization methods.
138 """
139
140 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
141
142 def __init__(self, f, tol=1e-5, imax=300):
143 """
144 Initializes a new minimizer for a given cost function.
145
146 :param f: the cost function to be minimized
147 :type f: CostFunction
148 """
149 self._f=f
150 self._tol = tol
151 self._imax = imax
152 self._result = None
153 self._callback = None
154 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
155
156 def setTolerance(self, tol):
157 """
158 Sets the tolerance for the stopping criterion. The minimizer stops when
159 an appropriate norm is less than `tol`.
160 """
161 self._tol = tol
162
163 def setMaxIterations(self, imax):
164 """
165 Sets the maximum number of iterations before the minimizer terminates.
166 """
167 self._imax = imax
168
169 def setCallback(self, callback):
170 """
171 Sets a callback function to be called after every iteration.
172 The arguments to the function are: (k, x, fx, gfx), where
173 k is the current iteration, x is the current estimate, fx=f(x) and
174 gfx=grad f(x).
175 """
176 if callback is not None and not callable(callback):
177 raise TypeError("Callback function not callable.")
178 self._callback = callback
179
180 def _doCallback(self, *args):
181 if self._callback is not None:
182 self._callback(*args)
183
184 def getResult(self):
185 """
186 Returns the result of the minimization.
187 """
188 return self._result
189
190 def getOptions(self):
191 """
192 Returns a dictionary of minimizer-specific options.
193 """
194 return {}
195
196 def setOptions(self, **opts):
197 """
198 Sets minimizer-specific options. For a list of possible options see
199 `getOptions()`.
200 """
201 raise NotImplementedError
202
203 def run(self, x0):
204 """
205 Executes the minimization algorithm for `f` starting with the initial
206 guess `x0`.
207 :return: TOLERANCE_REACHED or MAX_ITERATIONS_REACHED
208 """
209 raise NotImplementedError
210
211 def logSummary(self):
212 """
213 Outputs a summary of the completed minimization process to the logger.
214 """
215 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
216 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
217 self.logger.warning("Number of inner product evaluations: %d"%self._f.Inner_calls)
218 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
219
220
221 ##############################################################################
222 class MinimizerLBFGS(AbstractMinimizer):
223 """
224 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
225 method.
226 """
227
228 # History size
229 _m = 15
230
231 # Initial Hessian multiplier
232 _initial_H = 1
233
234 def getOptions(self):
235 return {'historySize':self._m,'initialHessian':self._initial_H}
236
237 def setOptions(self, **opts):
238 for o in opts:
239 if o=='historySize':
240 self._m=opts[o]
241 elif o=='initialHessian':
242 self._initial_H=opts[o]
243 else:
244 raise KeyError("Invalid option '%s'"%o)
245
246 def getNorm(self, x):
247 return sqrt(self._f.getInner(x, x))
248
249 def run(self, x):
250 args=self._f.getArguments(x)
251 gf=self._f.getGradient(x, *args)
252 fx=self._f(x, *args)
253 k=0
254 H=self._initial_H
255 error=2*self._tol
256 s_and_y=[]
257
258 self._doCallback(k, x, fx, gf)
259
260 while error > self._tol and k < self._imax:
261 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
262 self.logger.info("iteration %d, error=%e"%(k,error))
263 # determine search direction
264 p = -self._twoLoop(H, gf, s_and_y)
265
266 self.logger.debug("H = %s"%H)
267 self.logger.debug("grad f(x) = %s"%gf)
268 self.logger.debug("p = %s"%p)
269 self.logger.debug("x = %s"%x)
270
271 # determine step length
272 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
273 self.logger.debug("alpha=%e"%(alpha))
274 # execute the step
275 x_new = x + alpha*p
276 if gf_new is None:
277 gf_new=self._f.getGradient(x_new)
278 delta_x=x_new-x
279 delta_g=gf_new-gf
280 s_and_y.append((delta_x,delta_g))
281 if fx_new==0.:
282 error=fx
283 else:
284 error=abs(fx_new-fx)/abs(fx_new)
285 x=x_new
286 gf=gf_new
287 fx=fx_new
288 k+=1
289 self._doCallback(k, x, fx, gf)
290 if (error<=self._tol): break
291
292 # delete oldest vector pair
293 if k>self._m: s_and_y.pop(0)
294
295 # set the new scaling factor (approximation of inverse Hessian)
296 gnorm=self.getNorm(gf)
297 denom=self.getNorm(delta_g)
298 if denom < EPSILON * gnorm:
299 H=1.
300 self.logger.debug("Break down in H update. Resetting to 1.")
301 else:
302 H=self._f.getInner(delta_x,delta_g)/denom**2
303
304 if k >= self._imax:
305 reason=self.MAX_ITERATIONS_REACHED
306 self.logger.warning("Maximum number of iterations reached!")
307 else:
308 reason=self.TOLERANCE_REACHED
309 self.logger.warning("Success after %d iterations! Final error=%e"%(k,error))
310 self._result=x
311 return reason
312
313 def _twoLoop(self, H, gf, s_and_y):
314 """
315 Helper for the L-BFGS method.
316 See 'Numerical Optimization' by J. Nocedal for an explanation.
317 """
318 q=gf
319 alpha=[]
320 for s,y in reversed(s_and_y):
321 rho=1./(self._f.getInner(s, y))
322 a=rho*self._f.getInner(s, q)
323 alpha.append(a)
324 q=q-a*y
325
326 r=H*q
327 for s,y in s_and_y:
328 rho=1./(self._f.getInner(s, y))
329 beta=rho*self._f.getInner(y,r)
330 a=alpha.pop()
331 r=r+s*(a-beta)
332 return r
333
334 ##############################################################################
335 class MinimizerBFGS(AbstractMinimizer):
336 """
337 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
338 """
339
340 # Initial Hessian multiplier
341 _initial_H = 1
342
343 def getOptions(self):
344 return {'initialHessian':self._initial_H}
345
346 def setOptions(self, **opts):
347 for o in opts:
348 if o=='initialHessian':
349 self._initial_H=opts[o]
350 else:
351 raise KeyError("Invalid option '%s'"%o)
352
353 def run(self, x):
354 args=self._f.getArguments(x)
355 gf=self._f.getGradient(x, *args)
356 fx=self._f(x, *args)
357 k=0
358 try:
359 n=len(x)
360 except:
361 n=x.getNumberOfDataPoints()
362 I=np.eye(n)
363 H=self._initial_H*I
364 gnorm=Lsup(gf)
365 self._doCallback(k, x, fx, gf)
366
367 while gnorm > self._tol and k < self._imax:
368 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
369
370 # determine search direction
371 d=-self._f.getInner(H, gf)
372
373 self.logger.debug("H = %s"%H)
374 self.logger.debug("grad f(x) = %s"%gf)
375 self.logger.debug("d = %s"%d)
376 self.logger.debug("x = %s"%x)
377
378 # determine step length
379 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
380 self.logger.debug("alpha=%e"%alpha)
381 # execute the step
382 x_new=x+alpha*d
383 delta_x=x_new-x
384 x=x_new
385 if gf_new is None:
386 gf_new=self._f.getGradient(x_new)
387 delta_g=gf_new-gf
388 gf=gf_new
389 k+=1
390 self._doCallback(k, x, fx, gf)
391 gnorm=Lsup(gf)
392 if (gnorm<=self._tol): break
393
394 # update Hessian
395 denom=self._f.getInner(delta_x, delta_g)
396 if denom < EPSILON * gnorm:
397 denom=1e-5
398 self.logger.debug("Break down in H update. Resetting.")
399 rho=1./denom
400 self.logger.debug("rho=%e"%rho)
401 A=I-rho*delta_x[:,None]*delta_g[None,:]
402 AT=I-rho*delta_g[:,None]*delta_x[None,:]
403 H=self._f.getInner(A, self._f.getInner(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
404 if k >= self._imax:
405 reason=self.MAX_ITERATIONS_REACHED
406 self.logger.warning("Maximum number of iterations reached!")
407 else:
408 reason=self.TOLERANCE_REACHED
409 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
410
411 self._result=x
412 return reason
413
414 ##############################################################################
415 class MinimizerNLCG(AbstractMinimizer):
416 """
417 Minimizer that uses the nonlinear conjugate gradient method
418 (Fletcher-Reeves variant).
419 """
420
421 def run(self, x):
422 i=0
423 k=0
424 args=self._f.getArguments(x)
425 r=-self._f.getGradient(x, *args)
426 fx=self._f(x, *args)
427 d=r
428 delta=self._f.getInner(r,r)
429 delta0=delta
430 self._doCallback(i, x, fx, -r)
431
432 while i<self._imax and Lsup(r)>self._tol:
433 self.logger.info("iteration %d"%i)
434 self.logger.debug("grad f(x) = %s"%(-r))
435 self.logger.debug("d = %s"%d)
436 self.logger.debug("x = %s"%x)
437
438 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
439 self.logger.debug("alpha=%e"%(alpha))
440 x=x+alpha*d
441 r=-self._f.getGradient(x) if gf_new is None else -gf_new
442 delta_o=delta
443 delta=self._f.getInner(r,r)
444 beta=delta/delta_o
445 d=r+beta*d
446 k=k+1
447 try:
448 lenx=len(x)
449 except:
450 lenx=x.getNumberOfDataPoints()
451 if k == lenx or self._f.getInner(r,d) <= 0:
452 d=r
453 k=0
454 i+=1
455 self._doCallback(i, x, fx, gf_new)
456
457 if i >= self._imax:
458 reason=self.MAX_ITERATIONS_REACHED
459 self.logger.warning("Maximum number of iterations reached!")
460 else:
461 reason=self.TOLERANCE_REACHED
462 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
463
464 self._result=x
465 return reason
466
467
468 if __name__=="__main__":
469 # Example usage with function 'rosen' (minimum=[1,1,...1]):
470 from scipy.optimize import rosen, rosen_der
471 from esys.downunder.costfunctions import CostFunction
472 import sys
473 N=100
474 x0=np.array([4.]*N) # initial guess
475
476 class RosenFunc(CostFunction):
477 def _getInner(self, f0, f1):
478 return np.dot(f0, f1)
479 def _getValue(self, x, *args):
480 return rosen(x)
481 def _getGradient(self, x, *args):
482 return rosen_der(x)
483
484 f=RosenFunc()
485 m=None
486 if len(sys.argv)>1:
487 method=sys.argv[1].lower()
488 if method=='nlcg':
489 m=MinimizerNLCG(f)
490 elif method=='bfgs':
491 m=MinimizerBFGS(f)
492
493 if m is None:
494 # default
495 m=MinimizerLBFGS(f)
496 #m.setOptions(historySize=10000)
497
498 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.INFO)
499 m.setTolerance(1e-5)
500 m.setMaxIterations(600)
501 m.run(x0)
502 m.logSummary()
503 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
504
505 #from scipy.optimize import fmin_cg
506 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
507

  ViewVC Help
Powered by ViewVC 1.1.26