1 |
|
2 |
############################################################################## |
3 |
# |
4 |
# Copyright (c) 2003-2012 by University of Queensland |
5 |
# http://www.uq.edu.au |
6 |
# |
7 |
# Primary Business: Queensland, Australia |
8 |
# Licensed under the Open Software License version 3.0 |
9 |
# http://www.opensource.org/licenses/osl-3.0.php |
10 |
# |
11 |
# Development until 2012 by Earth Systems Science Computational Center (ESSCC) |
12 |
# Development since 2012 by School of Earth Sciences |
13 |
# |
14 |
############################################################################## |
15 |
|
16 |
"""Generic minimization algorithms""" |
17 |
|
18 |
__copyright__="""Copyright (c) 2003-2012 by University of Queensland |
19 |
http://www.uq.edu.au |
20 |
Primary Business: Queensland, Australia""" |
21 |
__license__="""Licensed under the Open Software License version 3.0 |
22 |
http://www.opensource.org/licenses/osl-3.0.php""" |
23 |
__url__="https://launchpad.net/escript-finley" |
24 |
|
25 |
__all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG'] |
26 |
|
27 |
import logging |
28 |
import numpy as np |
29 |
|
30 |
try: |
31 |
from esys.escript import Lsup, sqrt, EPSILON |
32 |
except: |
33 |
Lsup=lambda x: np.amax(abs(x)) |
34 |
sqrt=np.sqrt |
35 |
EPSILON=1e-18 |
36 |
|
37 |
import sys |
38 |
if sys.version_info[0]>2: |
39 |
xrange=range |
40 |
|
41 |
lslogger=logging.getLogger('inv.minimizer.linesearch') |
42 |
zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom') |
43 |
|
44 |
def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25): |
45 |
""" |
46 |
Helper function for `line_search` below which tries to tighten the range |
47 |
alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by |
48 |
J. Nocedal for an explanation. |
49 |
""" |
50 |
i=0 |
51 |
while True: |
52 |
alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation... |
53 |
args_a=phiargs(alpha) |
54 |
phi_a=phi(alpha, *args_a) |
55 |
zoomlogger.debug("Zoom.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a)) |
56 |
if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo: |
57 |
alpha_hi=alpha |
58 |
else: |
59 |
gphi_a=gradphi(alpha, *args_a) |
60 |
zoomlogger.debug("Zoom.grad(phi(alpha))=%e"%(gphi_a)) |
61 |
if np.abs(gphi_a) <= -c2*gphi0: |
62 |
break |
63 |
if gphi_a*(alpha_hi-alpha_lo) >= 0: |
64 |
alpha_hi = alpha_lo |
65 |
alpha_lo=alpha |
66 |
phi_lo=phi_a |
67 |
i+=1 |
68 |
if i>IMAX: |
69 |
gphi_a=None |
70 |
break |
71 |
return alpha, phi_a, gphi_a |
72 |
|
73 |
def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15): |
74 |
""" |
75 |
Line search method that satisfies the strong Wolfe conditions. |
76 |
See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation. |
77 |
|
78 |
:param f: callable objective function f(x) |
79 |
:param x: start value for the line search |
80 |
:param p: search direction |
81 |
:param gf: value for the gradient of f at x |
82 |
:param fx: value of f(x) |
83 |
:param alpha_max: algorithm terminates if alpha reaches this value |
84 |
:param c1: value for Armijo condition (see reference) |
85 |
:param c2: value for curvature condition (see reference) |
86 |
:param IMAX: maximum number of iterations to perform |
87 |
""" |
88 |
# this stores the latest gradf(x+a*p) which is returned |
89 |
gf_new=[gf] |
90 |
|
91 |
def phi(a, *args): |
92 |
""" phi(a):=f(x+a*p) """ |
93 |
return f(x+a*p, *args) |
94 |
def gradphi(a, *args): |
95 |
gf_new[0]=f.getGradient(x+a*p, *args) |
96 |
return f.getDualProduct(p, gf_new[0]) |
97 |
|
98 |
def phiargs(a): |
99 |
try: |
100 |
args=f.getArguments(x+a*p) |
101 |
except: |
102 |
args=() |
103 |
return args |
104 |
|
105 |
old_alpha=0. |
106 |
# we assume gf is properly scaled so alpha=1 is a reasonable starting value |
107 |
alpha=1. |
108 |
if fx is None: |
109 |
args0=phiargs(0.) |
110 |
phi0=phi(0., *args0) |
111 |
else: |
112 |
phi0=fx |
113 |
lslogger.debug("phi(0)=%e"%(phi0)) |
114 |
gphi0=f.getDualProduct(p, gf) #gradphi(0., *args0) |
115 |
lslogger.debug("grad phi(0)=%e"%(gphi0)) |
116 |
old_phi_a=phi0 |
117 |
i=1 |
118 |
|
119 |
while i<IMAX and alpha>0. and alpha<alpha_max: |
120 |
|
121 |
args_a=phiargs(alpha) |
122 |
phi_a=phi(alpha, *args_a) |
123 |
lslogger.debug("Line Search.iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a)) |
124 |
if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)): |
125 |
alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0) |
126 |
break |
127 |
|
128 |
gphi_a=gradphi(alpha, *args_a) |
129 |
if np.abs(gphi_a) <= -c2*gphi0: |
130 |
break |
131 |
if gphi_a >= 0: |
132 |
alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0) |
133 |
break |
134 |
|
135 |
old_alpha=alpha |
136 |
# the factor is arbitrary as long as there is sufficient increase |
137 |
alpha=2.*alpha |
138 |
old_phi_a=phi_a |
139 |
i+=1 |
140 |
|
141 |
return alpha, phi_a, gf_new[0] |
142 |
|
143 |
############################################################################## |
144 |
class AbstractMinimizer(object): |
145 |
""" |
146 |
Base class for function minimization methods. |
147 |
""" |
148 |
|
149 |
TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2)) |
150 |
|
151 |
def __init__(self, f, tol=1e-5, imax=300): |
152 |
""" |
153 |
Initializes a new minimizer for a given cost function. |
154 |
|
155 |
:param f: the cost function to be minimized |
156 |
:type f: CostFunction |
157 |
""" |
158 |
self._f=f |
159 |
self._tol = tol |
160 |
self._imax = imax |
161 |
self._result = None |
162 |
self._callback = None |
163 |
self.logger = logging.getLogger('inv.%s'%self.__class__.__name__) |
164 |
|
165 |
def setTolerance(self, tol): |
166 |
""" |
167 |
Sets the tolerance for the stopping criterion. The minimizer stops when |
168 |
an appropriate norm is less than `tol`. |
169 |
""" |
170 |
self._tol = tol |
171 |
|
172 |
def setMaxIterations(self, imax): |
173 |
""" |
174 |
Sets the maximum number of iterations before the minimizer terminates. |
175 |
""" |
176 |
self._imax = imax |
177 |
|
178 |
def setCallback(self, callback): |
179 |
""" |
180 |
Sets a callback function to be called after every iteration. |
181 |
The arguments to the function are: (k, x, fx, gfx), where |
182 |
k is the current iteration, x is the current estimate, fx=f(x) and |
183 |
gfx=grad f(x). |
184 |
""" |
185 |
if callback is not None and not callable(callback): |
186 |
raise TypeError("Callback function not callable.") |
187 |
self._callback = callback |
188 |
|
189 |
def _doCallback(self, *args): |
190 |
if self._callback is not None: |
191 |
self._callback(*args) |
192 |
|
193 |
def getResult(self): |
194 |
""" |
195 |
Returns the result of the minimization. |
196 |
""" |
197 |
return self._result |
198 |
|
199 |
def getOptions(self): |
200 |
""" |
201 |
Returns a dictionary of minimizer-specific options. |
202 |
""" |
203 |
return {} |
204 |
|
205 |
def setOptions(self, **opts): |
206 |
""" |
207 |
Sets minimizer-specific options. For a list of possible options see |
208 |
`getOptions()`. |
209 |
""" |
210 |
raise NotImplementedError |
211 |
|
212 |
def run(self, x0): |
213 |
""" |
214 |
Executes the minimization algorithm for *f* starting with the initial |
215 |
guess ``x0``. |
216 |
|
217 |
:return: `TOLERANCE_REACHED` or `MAX_ITERATIONS_REACHED` |
218 |
""" |
219 |
raise NotImplementedError |
220 |
|
221 |
def logSummary(self): |
222 |
""" |
223 |
Outputs a summary of the completed minimization process to the logger. |
224 |
""" |
225 |
if hasattr(self._f, "Value_calls"): |
226 |
self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls) |
227 |
self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls) |
228 |
self.logger.warning("Number of inner product evaluations: %d"%self._f.DualProduct_calls) |
229 |
self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls) |
230 |
|
231 |
|
232 |
############################################################################## |
233 |
class MinimizerLBFGS(AbstractMinimizer): |
234 |
""" |
235 |
Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno |
236 |
method. |
237 |
""" |
238 |
|
239 |
# History size |
240 |
_m = 15 |
241 |
|
242 |
# Initial Hessian multiplier |
243 |
_initial_H = 1 |
244 |
|
245 |
def getOptions(self): |
246 |
return {'historySize':self._m,'initialHessian':self._initial_H} |
247 |
|
248 |
def setOptions(self, **opts): |
249 |
for o in opts: |
250 |
if o=='historySize': |
251 |
self._m=opts[o] |
252 |
elif o=='initialHessian': |
253 |
self._initial_H=opts[o] |
254 |
else: |
255 |
raise KeyError("Invalid option '%s'"%o) |
256 |
|
257 |
def run(self, x): |
258 |
args=self._f.getArguments(x) |
259 |
gf=self._f.getGradient(x, *args) |
260 |
fx=self._f(x, *args) |
261 |
if self._f.provides_inverse_Hessian_approximation: |
262 |
self._f.updateHessian() |
263 |
invH_scale = None |
264 |
else: |
265 |
invH_scale = self._initial_H |
266 |
k=0 |
267 |
error=2*self._tol |
268 |
s_and_y=[] |
269 |
|
270 |
self._doCallback(k, x, fx, gf) |
271 |
|
272 |
while error > self._tol and k < self._imax: |
273 |
#self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error)) |
274 |
if k >0: |
275 |
self.logger.info("LBFGS.iteration %d, error=%e"%(k,error)) |
276 |
else: |
277 |
self.logger.info("LBFGS.iteration %d"%k) |
278 |
# determine search direction |
279 |
p = -self._twoLoop(invH_scale, gf, s_and_y, x, *args) |
280 |
|
281 |
if invH_scale: self.logger.debug("H = %s"%invH_scale) |
282 |
self.logger.debug("grad f(x) = %s"%gf) |
283 |
#self.logger.debug("p = %s"%p) |
284 |
#self.logger.debug("x = %s"%x) |
285 |
|
286 |
# determine step length |
287 |
alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx) |
288 |
self.logger.debug("LBFGS.alpha=%e"%(alpha)) |
289 |
# execute the step |
290 |
x_new = x + alpha*p |
291 |
print gf_new |
292 |
if gf_new is None: |
293 |
args=self._f.getArguments(x_new) |
294 |
gf_new=self._f.getGradient(x_new, args) |
295 |
|
296 |
delta_x=x_new-x |
297 |
delta_g=gf_new-gf |
298 |
rho=self._f.getDualProduct(delta_x, delta_g) |
299 |
print "rho =",rho |
300 |
if abs(rho)>0 : |
301 |
s_and_y.append((delta_x,delta_g, rho )) |
302 |
else: |
303 |
raise ZeroDivisionError("LBFGS break down.") |
304 |
# this needs to be reviewed |
305 |
if fx_new==0.: |
306 |
error=fx |
307 |
else: |
308 |
error=abs(fx_new-fx)/abs(fx_new) |
309 |
|
310 |
self._f.updateHessian() |
311 |
x=x_new |
312 |
gf=gf_new |
313 |
fx=fx_new |
314 |
k+=1 |
315 |
self._doCallback(k, x, fx, gf) |
316 |
if (error<=self._tol): break |
317 |
|
318 |
# delete oldest vector pair |
319 |
if k>self._m: s_and_y.pop(0) |
320 |
|
321 |
if not self._f.provides_inverse_Hessian_approximation: |
322 |
# set the new scaling factor (approximation of inverse Hessian) |
323 |
denom=self._f.getDualProduct(delta_g, delta_g) |
324 |
if denom > 0: |
325 |
invH_scale=self._f.getDualProduct(delta_x,delta_g)/denom |
326 |
else: |
327 |
invH_scale=self._initial_H |
328 |
self.logger.debug("LBFGS.Break down in H update. Resetting to initial value %s."%self._initial_H) |
329 |
|
330 |
if k >= self._imax: |
331 |
reason=self.MAX_ITERATIONS_REACHED |
332 |
self.logger.warning("LBFGS.Maximum number of iterations reached!") |
333 |
else: |
334 |
reason=self.TOLERANCE_REACHED |
335 |
self.logger.warning("LBFGS.Success after %d iterations! Final error=%e"%(k,error)) |
336 |
self._result=x |
337 |
return reason |
338 |
|
339 |
def _twoLoop(self, invH_scale, gf, s_and_y, x, *args): |
340 |
""" |
341 |
Helper for the L-BFGS method. |
342 |
See 'Numerical Optimization' by J. Nocedal for an explanation. |
343 |
""" |
344 |
q=gf |
345 |
alpha=[] |
346 |
for s,y, rho in reversed(s_and_y): |
347 |
a=self._f.getDualProduct(s, q)/rho |
348 |
alpha.append(a) |
349 |
q=q-a*y |
350 |
|
351 |
if self._f.provides_inverse_Hessian_approximation: |
352 |
r= self._f.getInverseHessianApproximation(x, q, *args) |
353 |
else: |
354 |
r= invH_scale * q |
355 |
|
356 |
for s,y,rho in s_and_y: |
357 |
beta=self._f.getDualProduct(r, y)/rho |
358 |
a=alpha.pop() |
359 |
r=r+s*(a-beta) |
360 |
return r |
361 |
|
362 |
############################################################################## |
363 |
class MinimizerBFGS(AbstractMinimizer): |
364 |
""" |
365 |
Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method. |
366 |
""" |
367 |
|
368 |
# Initial Hessian multiplier |
369 |
_initial_H = 1 |
370 |
|
371 |
def getOptions(self): |
372 |
return {'initialHessian':self._initial_H} |
373 |
|
374 |
def setOptions(self, **opts): |
375 |
for o in opts: |
376 |
if o=='initialHessian': |
377 |
self._initial_H=opts[o] |
378 |
else: |
379 |
raise KeyError("Invalid option '%s'"%o) |
380 |
|
381 |
def run(self, x): |
382 |
args=self._f.getArguments(x) |
383 |
gf=self._f.getGradient(x, *args) |
384 |
fx=self._f(x, *args) |
385 |
k=0 |
386 |
try: |
387 |
n=len(x) |
388 |
except: |
389 |
n=x.getNumberOfDataPoints() |
390 |
I=np.eye(n) |
391 |
H=self._initial_H*I |
392 |
gnorm=Lsup(gf) |
393 |
self._doCallback(k, x, fx, gf) |
394 |
|
395 |
while gnorm > self._tol and k < self._imax: |
396 |
self.logger.info("iteration %d, gnorm=%e"%(k,gnorm)) |
397 |
|
398 |
# determine search direction |
399 |
d=-self._f.getDualProduct(H, gf) |
400 |
|
401 |
self.logger.debug("H = %s"%H) |
402 |
self.logger.debug("grad f(x) = %s"%gf) |
403 |
self.logger.debug("d = %s"%d) |
404 |
self.logger.debug("x = %s"%x) |
405 |
|
406 |
# determine step length |
407 |
alpha, fx, gf_new = line_search(self._f, x, d, gf, fx) |
408 |
self.logger.debug("alpha=%e"%alpha) |
409 |
# execute the step |
410 |
x_new=x+alpha*d |
411 |
delta_x=x_new-x |
412 |
x=x_new |
413 |
if gf_new is None: |
414 |
gf_new=self._f.getGradient(x_new) |
415 |
delta_g=gf_new-gf |
416 |
gf=gf_new |
417 |
k+=1 |
418 |
self._doCallback(k, x, fx, gf) |
419 |
gnorm=Lsup(gf) |
420 |
if (gnorm<=self._tol): break |
421 |
|
422 |
# update Hessian |
423 |
denom=self._f.getDualProduct(delta_x, delta_g) |
424 |
if denom < EPSILON * gnorm: |
425 |
denom=1e-5 |
426 |
self.logger.debug("Break down in H update. Resetting.") |
427 |
rho=1./denom |
428 |
self.logger.debug("rho=%e"%rho) |
429 |
A=I-rho*delta_x[:,None]*delta_g[None,:] |
430 |
AT=I-rho*delta_g[:,None]*delta_x[None,:] |
431 |
H=self._f.getDualProduct(A, self._f.getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:] |
432 |
if k >= self._imax: |
433 |
reason=self.MAX_ITERATIONS_REACHED |
434 |
self.logger.warning("Maximum number of iterations reached!") |
435 |
else: |
436 |
reason=self.TOLERANCE_REACHED |
437 |
self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm)) |
438 |
|
439 |
self._result=x |
440 |
return reason |
441 |
|
442 |
############################################################################## |
443 |
class MinimizerNLCG(AbstractMinimizer): |
444 |
""" |
445 |
Minimizer that uses the nonlinear conjugate gradient method |
446 |
(Fletcher-Reeves variant). |
447 |
""" |
448 |
|
449 |
def run(self, x): |
450 |
i=0 |
451 |
k=0 |
452 |
args=self._f.getArguments(x) |
453 |
r=-self._f.getGradient(x, *args) |
454 |
fx=self._f(x, *args) |
455 |
d=r |
456 |
delta=self._f.getDualProduct(r,r) |
457 |
delta0=delta |
458 |
self._doCallback(i, x, fx, -r) |
459 |
|
460 |
while i<self._imax and Lsup(r)>self._tol: |
461 |
self.logger.info("iteration %d"%i) |
462 |
self.logger.debug("grad f(x) = %s"%(-r)) |
463 |
self.logger.debug("d = %s"%d) |
464 |
self.logger.debug("x = %s"%x) |
465 |
|
466 |
alpha, fx, gf_new = line_search(self._f, x, d, -r, fx, c2=0.4) |
467 |
self.logger.debug("alpha=%e"%(alpha)) |
468 |
x=x+alpha*d |
469 |
r=-self._f.getGradient(x) if gf_new is None else -gf_new |
470 |
delta_o=delta |
471 |
delta=self._f.getDualProduct(r,r) |
472 |
beta=delta/delta_o |
473 |
d=r+beta*d |
474 |
k=k+1 |
475 |
try: |
476 |
lenx=len(x) |
477 |
except: |
478 |
lenx=x.getNumberOfDataPoints() |
479 |
if k == lenx or self._f.getDualProduct(r,d) <= 0: |
480 |
d=r |
481 |
k=0 |
482 |
i+=1 |
483 |
self._doCallback(i, x, fx, gf_new) |
484 |
|
485 |
if i >= self._imax: |
486 |
reason=self.MAX_ITERATIONS_REACHED |
487 |
self.logger.warning("Maximum number of iterations reached!") |
488 |
else: |
489 |
reason=self.TOLERANCE_REACHED |
490 |
self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta)) |
491 |
|
492 |
self._result=x |
493 |
return reason |
494 |
|
495 |
|
496 |
if __name__=="__main__": |
497 |
# Example usage with function 'rosen' (minimum=[1,1,...1]): |
498 |
from scipy.optimize import rosen, rosen_der |
499 |
from esys.downunder import MeteredCostFunction |
500 |
import sys |
501 |
N=10 |
502 |
x0=np.array([4.]*N) # initial guess |
503 |
|
504 |
class RosenFunc(MeteredCostFunction): |
505 |
def __init__(self): |
506 |
super(RosenFunc, self).__init__() |
507 |
self.provides_inverse_Hessian_approximation=False |
508 |
def _getDualProduct(self, f0, f1): |
509 |
return np.dot(f0, f1) |
510 |
def _getValue(self, x, *args): |
511 |
return rosen(x) |
512 |
def _getGradient(self, x, *args): |
513 |
return rosen_der(x) |
514 |
|
515 |
f=RosenFunc() |
516 |
m=None |
517 |
if len(sys.argv)>1: |
518 |
method=sys.argv[1].lower() |
519 |
if method=='nlcg': |
520 |
m=MinimizerNLCG(f) |
521 |
elif method=='bfgs': |
522 |
m=MinimizerBFGS(f) |
523 |
|
524 |
if m is None: |
525 |
# default |
526 |
m=MinimizerLBFGS(f) |
527 |
#m.setOptions(historySize=10000) |
528 |
|
529 |
logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG) |
530 |
m.setTolerance(1e-5) |
531 |
m.setMaxIterations(600) |
532 |
m.run(x0) |
533 |
m.logSummary() |
534 |
print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult()))) |
535 |
|
536 |
#from scipy.optimize import fmin_cg |
537 |
#print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000)))) |
538 |
|