/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4050 - (show annotations)
Wed Oct 31 01:01:24 2012 UTC (7 years ago) by caltinay
File MIME type: text/x-python
File size: 16562 byte(s)
Changed initial Hessian in magnetic test and removed some debug output.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29 try:
30 from esys.escript import Lsup, sqrt, EPSILON
31 except:
32 Lsup=lambda x: np.amax(abs(x))
33 sqrt=np.sqrt
34 EPSILON=1e-18
35
36 import sys
37 if sys.version_info[0]>2:
38 xrange=range
39
40 lslogger=logging.getLogger('inv.minimizer.linesearch')
41 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
42
43 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
44 """
45 Helper function for `line_search` below which tries to tighten the range
46 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
47 J. Nocedal for an explanation.
48 """
49 i=0
50 while True:
51 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
52 args_a=phiargs(alpha)
53 phi_a=phi(alpha, *args_a)
54 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
55 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
56 alpha_hi=alpha
57 else:
58 gphi_a=gradphi(alpha, *args_a)
59 zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a))
60 if np.abs(gphi_a) <= -c2*gphi0:
61 break
62 if gphi_a*(alpha_hi-alpha_lo) >= 0:
63 alpha_hi = alpha_lo
64 alpha_lo=alpha
65 phi_lo=phi_a
66 i+=1
67 if i>IMAX:
68 gphi_a=None
69 break
70 return alpha, phi_a, gphi_a
71
72 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
73 """
74 Line search method that satisfies the strong Wolfe conditions.
75 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
76
77 :param f: callable objective function f(x)
78 :param x: start value for the line search
79 :param p: search direction
80 :param gf: value for the gradient of f at x
81 :param fx: value of f(x)
82 :param alpha_max: algorithm terminates if alpha reaches this value
83 :param c1: value for Armijo condition (see reference)
84 :param c2: value for curvature condition (see reference)
85 :param IMAX: maximum number of iterations to perform
86 """
87 # this stores the latest gradf(x+a*p) which is returned
88 gf_new=[gf]
89
90 def phi(a, *args):
91 """ phi(a):=f(x+a*p) """
92 return f(x+a*p, *args)
93 def gradphi(a, *args):
94 gf_new[0]=f.getGradient(x+a*p, *args)
95 return f.getInner(gf_new[0], p)
96 #return f.getDirectionalDerivative(x+a*p, p, *args)
97 def phiargs(a):
98 try:
99 args=f.getArguments(x+a*p)
100 except:
101 args=()
102 return args
103
104 old_alpha=0.
105 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
106 alpha=1.
107 if fx is None:
108 args0=phiargs(0.)
109 phi0=phi(0., *args0)
110 else:
111 phi0=fx
112 lslogger.debug("phi(0)=%e"%(phi0))
113 gphi0=f.getInner(gf, p) #gradphi(0., *args0)
114 lslogger.debug("grad phi(0)=%e"%(gphi0))
115 old_phi_a=phi0
116 i=1
117
118 while i<IMAX and alpha>0. and alpha<alpha_max:
119
120 args_a=phiargs(alpha)
121 phi_a=phi(alpha, *args_a)
122 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
123 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
124 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
125 break
126
127 gphi_a=gradphi(alpha, *args_a)
128 if np.abs(gphi_a) <= -c2*gphi0:
129 break
130 if gphi_a >= 0:
131 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
132 break
133
134 old_alpha=alpha
135 # the factor is arbitrary as long as there is sufficient increase
136 alpha=2.*alpha
137 old_phi_a=phi_a
138 i+=1
139
140 return alpha, phi_a, gf_new[0]
141
142 ##############################################################################
143 class AbstractMinimizer(object):
144 """
145 Base class for function minimization methods.
146 """
147
148 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
149
150 def __init__(self, f, tol=1e-5, imax=300):
151 """
152 Initializes a new minimizer for a given cost function.
153
154 :param f: the cost function to be minimized
155 :type f: CostFunction
156 """
157 self._f=f
158 self._tol = tol
159 self._imax = imax
160 self._result = None
161 self._callback = None
162 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
163
164 def setTolerance(self, tol):
165 """
166 Sets the tolerance for the stopping criterion. The minimizer stops when
167 an appropriate norm is less than `tol`.
168 """
169 self._tol = tol
170
171 def setMaxIterations(self, imax):
172 """
173 Sets the maximum number of iterations before the minimizer terminates.
174 """
175 self._imax = imax
176
177 def setCallback(self, callback):
178 """
179 Sets a callback function to be called after every iteration.
180 The arguments to the function are: (k, x, fx, gfx), where
181 k is the current iteration, x is the current estimate, fx=f(x) and
182 gfx=grad f(x).
183 """
184 if callback is not None and not callable(callback):
185 raise TypeError("Callback function not callable.")
186 self._callback = callback
187
188 def _doCallback(self, *args):
189 if self._callback is not None:
190 self._callback(*args)
191
192 def getResult(self):
193 """
194 Returns the result of the minimization.
195 """
196 return self._result
197
198 def getOptions(self):
199 """
200 Returns a dictionary of minimizer-specific options.
201 """
202 return {}
203
204 def setOptions(self, **opts):
205 """
206 Sets minimizer-specific options. For a list of possible options see
207 `getOptions()`.
208 """
209 raise NotImplementedError
210
211 def run(self, x0):
212 """
213 Executes the minimization algorithm for *f* starting with the initial
214 guess ``x0``.
215
216 :return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED`
217 """
218 raise NotImplementedError
219
220 def logSummary(self):
221 """
222 Outputs a summary of the completed minimization process to the logger.
223 """
224 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
225 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
226 self.logger.warning("Number of inner product evaluations: %d"%self._f.Inner_calls)
227 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
228
229
230 ##############################################################################
231 class MinimizerLBFGS(AbstractMinimizer):
232 """
233 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
234 method.
235 """
236
237 # History size
238 _m = 15
239
240 # Initial Hessian multiplier
241 _initial_H = 1
242
243 def getOptions(self):
244 return {'historySize':self._m,'initialHessian':self._initial_H}
245
246 def setOptions(self, **opts):
247 for o in opts:
248 if o=='historySize':
249 self._m=opts[o]
250 elif o=='initialHessian':
251 self._initial_H=opts[o]
252 else:
253 raise KeyError("Invalid option '%s'"%o)
254
255 def getNorm(self, x):
256 return sqrt(self._f.getInner(x, x))
257
258 def run(self, x):
259 args=self._f.getArguments(x)
260 gf=self._f.getGradient(x, *args)
261 fx=self._f(x, *args)
262 k=0
263 H=self._initial_H
264 error=2*self._tol
265 s_and_y=[]
266
267 self._doCallback(k, x, fx, gf)
268
269 while error > self._tol and k < self._imax:
270 #self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
271 self.logger.info("iteration %d, error=%e"%(k,error))
272 # determine search direction
273 p = -self._twoLoop(H, gf, s_and_y)
274
275 self.logger.debug("H = %s"%H)
276 self.logger.debug("grad f(x) = %s"%gf)
277 self.logger.debug("p = %s"%p)
278 self.logger.debug("x = %s"%x)
279
280 # determine step length
281 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
282 self.logger.debug("alpha=%e"%(alpha))
283 # execute the step
284 x_new = x + alpha*p
285 if gf_new is None:
286 gf_new=self._f.getGradient(x_new)
287 delta_x=x_new-x
288 delta_g=gf_new-gf
289 s_and_y.append((delta_x,delta_g))
290 if fx_new==0.:
291 error=fx
292 else:
293 error=abs(fx_new-fx)/abs(fx_new)
294 x=x_new
295 gf=gf_new
296 fx=fx_new
297 k+=1
298 self._doCallback(k, x, fx, gf)
299 if (error<=self._tol): break
300
301 # delete oldest vector pair
302 if k>self._m: s_and_y.pop(0)
303
304 # set the new scaling factor (approximation of inverse Hessian)
305 gnorm=self.getNorm(gf)
306 denom=self.getNorm(delta_g)
307 if denom < EPSILON * gnorm:
308 H=1.
309 self.logger.debug("Break down in H update. Resetting to 1.")
310 else:
311 H=self._f.getInner(delta_x,delta_g)/denom**2
312
313 if k >= self._imax:
314 reason=self.MAX_ITERATIONS_REACHED
315 self.logger.warning("Maximum number of iterations reached!")
316 else:
317 reason=self.TOLERANCE_REACHED
318 self.logger.warning("Success after %d iterations! Final error=%e"%(k,error))
319 self._result=x
320 return reason
321
322 def _twoLoop(self, H, gf, s_and_y):
323 """
324 Helper for the L-BFGS method.
325 See 'Numerical Optimization' by J. Nocedal for an explanation.
326 """
327 q=gf
328 alpha=[]
329 for s,y in reversed(s_and_y):
330 rho=1./(self._f.getInner(s, y))
331 a=rho*self._f.getInner(s, q)
332 alpha.append(a)
333 q=q-a*y
334
335 r=H*q
336 for s,y in s_and_y:
337 rho=1./(self._f.getInner(s, y))
338 beta=rho*self._f.getInner(y,r)
339 a=alpha.pop()
340 r=r+s*(a-beta)
341 return r
342
343 ##############################################################################
344 class MinimizerBFGS(AbstractMinimizer):
345 """
346 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
347 """
348
349 # Initial Hessian multiplier
350 _initial_H = 1
351
352 def getOptions(self):
353 return {'initialHessian':self._initial_H}
354
355 def setOptions(self, **opts):
356 for o in opts:
357 if o=='initialHessian':
358 self._initial_H=opts[o]
359 else:
360 raise KeyError("Invalid option '%s'"%o)
361
362 def run(self, x):
363 args=self._f.getArguments(x)
364 gf=self._f.getGradient(x, *args)
365 fx=self._f(x, *args)
366 k=0
367 try:
368 n=len(x)
369 except:
370 n=x.getNumberOfDataPoints()
371 I=np.eye(n)
372 H=self._initial_H*I
373 gnorm=Lsup(gf)
374 self._doCallback(k, x, fx, gf)
375
376 while gnorm > self._tol and k < self._imax:
377 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
378
379 # determine search direction
380 d=-self._f.getInner(H, gf)
381
382 self.logger.debug("H = %s"%H)
383 self.logger.debug("grad f(x) = %s"%gf)
384 self.logger.debug("d = %s"%d)
385 self.logger.debug("x = %s"%x)
386
387 # determine step length
388 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
389 self.logger.debug("alpha=%e"%alpha)
390 # execute the step
391 x_new=x+alpha*d
392 delta_x=x_new-x
393 x=x_new
394 if gf_new is None:
395 gf_new=self._f.getGradient(x_new)
396 delta_g=gf_new-gf
397 gf=gf_new
398 k+=1
399 self._doCallback(k, x, fx, gf)
400 gnorm=Lsup(gf)
401 if (gnorm<=self._tol): break
402
403 # update Hessian
404 denom=self._f.getInner(delta_x, delta_g)
405 if denom < EPSILON * gnorm:
406 denom=1e-5
407 self.logger.debug("Break down in H update. Resetting.")
408 rho=1./denom
409 self.logger.debug("rho=%e"%rho)
410 A=I-rho*delta_x[:,None]*delta_g[None,:]
411 AT=I-rho*delta_g[:,None]*delta_x[None,:]
412 H=self._f.getInner(A, self._f.getInner(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
413 if k >= self._imax:
414 reason=self.MAX_ITERATIONS_REACHED
415 self.logger.warning("Maximum number of iterations reached!")
416 else:
417 reason=self.TOLERANCE_REACHED
418 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
419
420 self._result=x
421 return reason
422
423 ##############################################################################
424 class MinimizerNLCG(AbstractMinimizer):
425 """
426 Minimizer that uses the nonlinear conjugate gradient method
427 (Fletcher-Reeves variant).
428 """
429
430 def run(self, x):
431 i=0
432 k=0
433 args=self._f.getArguments(x)
434 r=-self._f.getGradient(x, *args)
435 fx=self._f(x, *args)
436 d=r
437 delta=self._f.getInner(r,r)
438 delta0=delta
439 self._doCallback(i, x, fx, -r)
440
441 while i<self._imax and Lsup(r)>self._tol:
442 self.logger.info("iteration %d"%i)
443 self.logger.debug("grad f(x) = %s"%(-r))
444 self.logger.debug("d = %s"%d)
445 self.logger.debug("x = %s"%x)
446
447 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4)
448 self.logger.debug("alpha=%e"%(alpha))
449 x=x+alpha*d
450 r=-self._f.getGradient(x) if gf_new is None else -gf_new
451 delta_o=delta
452 delta=self._f.getInner(r,r)
453 beta=delta/delta_o
454 d=r+beta*d
455 k=k+1
456 try:
457 lenx=len(x)
458 except:
459 lenx=x.getNumberOfDataPoints()
460 if k == lenx or self._f.getInner(r,d) <= 0:
461 d=r
462 k=0
463 i+=1
464 self._doCallback(i, x, fx, gf_new)
465
466 if i >= self._imax:
467 reason=self.MAX_ITERATIONS_REACHED
468 self.logger.warning("Maximum number of iterations reached!")
469 else:
470 reason=self.TOLERANCE_REACHED
471 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
472
473 self._result=x
474 return reason
475
476
477 if __name__=="__main__":
478 # Example usage with function 'rosen' (minimum=[1,1,...1]):
479 from scipy.optimize import rosen, rosen_der
480 from esys.downunder.costfunctions import CostFunction
481 import sys
482 N=100
483 x0=np.array([4.]*N) # initial guess
484
485 class RosenFunc(CostFunction):
486 def _getInner(self, f0, f1):
487 return np.dot(f0, f1)
488 def _getValue(self, x, *args):
489 return rosen(x)
490 def _getGradient(self, x, *args):
491 return rosen_der(x)
492
493 f=RosenFunc()
494 m=None
495 if len(sys.argv)>1:
496 method=sys.argv[1].lower()
497 if method=='nlcg':
498 m=MinimizerNLCG(f)
499 elif method=='bfgs':
500 m=MinimizerBFGS(f)
501
502 if m is None:
503 # default
504 m=MinimizerLBFGS(f)
505 #m.setOptions(historySize=10000)
506
507 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.INFO)
508 m.setTolerance(1e-5)
509 m.setMaxIterations(600)
510 m.run(x0)
511 m.logSummary()
512 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
513
514 #from scipy.optimize import fmin_cg
515 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
516

  ViewVC Help
Powered by ViewVC 1.1.26