1 |
|
2 |
######################################################## |
3 |
# |
4 |
# Copyright (c) 2003-2012 by University of Queensland |
5 |
# Earth Systems Science Computational Center (ESSCC) |
6 |
# http://www.uq.edu.au/esscc |
7 |
# |
8 |
# Primary Business: Queensland, Australia |
9 |
# Licensed under the Open Software License version 3.0 |
10 |
# http://www.opensource.org/licenses/osl-3.0.php |
11 |
# |
12 |
######################################################## |
13 |
|
14 |
__copyright__="""Copyright (c) 2003-2012 by University of Queensland |
15 |
Earth Systems Science Computational Center (ESSCC) |
16 |
http://www.uq.edu.au/esscc |
17 |
Primary Business: Queensland, Australia""" |
18 |
__license__="""Licensed under the Open Software License version 3.0 |
19 |
http://www.opensource.org/licenses/osl-3.0.php""" |
20 |
__url__="https://launchpad.net/escript-finley" |
21 |
|
22 |
__all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG'] |
23 |
|
24 |
import logging |
25 |
import numpy as np |
26 |
try: |
27 |
from esys.escript import Lsup, sqrt, EPSILON |
28 |
except: |
29 |
Lsup=lambda x: np.amax(abs(x)) |
30 |
sqrt=np.sqrt |
31 |
EPSILON=1e-18 |
32 |
|
33 |
lslogger=logging.getLogger('inv.minimizer.linesearch') |
34 |
zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom') |
35 |
|
36 |
def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25): |
37 |
""" |
38 |
Helper function for `line_search` below which tries to tighten the range |
39 |
alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by |
40 |
J. Nocedal for an explanation. |
41 |
""" |
42 |
i=0 |
43 |
while True: |
44 |
alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation... |
45 |
args_a=phiargs(alpha) |
46 |
phi_a=phi(alpha, *args_a) |
47 |
zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a)) |
48 |
if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo: |
49 |
alpha_hi=alpha |
50 |
else: |
51 |
gphi_a=gradphi(alpha, *args_a) |
52 |
zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a)) |
53 |
if np.abs(gphi_a) <= -c2*gphi0: |
54 |
break |
55 |
if gphi_a*(alpha_hi-alpha_lo) >= 0: |
56 |
alpha_hi = alpha_lo |
57 |
alpha_lo=alpha |
58 |
phi_lo=phi_a |
59 |
i+=1 |
60 |
if i>IMAX: |
61 |
gphi_a=None |
62 |
break |
63 |
return alpha, phi_a, gphi_a |
64 |
|
65 |
def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15): |
66 |
""" |
67 |
Line search method that satisfies the strong Wolfe conditions. |
68 |
See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation. |
69 |
|
70 |
:param f: callable objective function f(x) |
71 |
:param x: start value for the line search |
72 |
:param p: search direction |
73 |
:param gf: value for the gradient of f at x |
74 |
:param fx: value of f(x) |
75 |
:param alpha_max: algorithm terminates if alpha reaches this value |
76 |
:param c1: value for Armijo condition (see reference) |
77 |
:param c2: value for curvature condition (see reference) |
78 |
:param IMAX: maximum number of iterations to perform |
79 |
""" |
80 |
# this stores the latest gradf(x+a*p) which is returned |
81 |
gf_new=[gf] |
82 |
|
83 |
def phi(a, *args): |
84 |
""" phi(a):=f(x+a*p) """ |
85 |
return f(x+a*p, *args) |
86 |
def gradphi(a, *args): |
87 |
gf_new[0]=f.getGradient(x+a*p, *args) |
88 |
return f.getInner(gf_new[0], p) |
89 |
#return f.getDirectionalDerivative(x+a*p, p, *args) |
90 |
def phiargs(a): |
91 |
try: |
92 |
args=f.getArguments(x+a*p) |
93 |
except: |
94 |
args=() |
95 |
return args |
96 |
|
97 |
old_alpha=0. |
98 |
# we assume gf is properly scaled so alpha=1 is a reasonable starting value |
99 |
alpha=1. |
100 |
if fx is None: |
101 |
args0=phiargs(0.) |
102 |
phi0=phi(0., *args0) |
103 |
else: |
104 |
phi0=fx |
105 |
lslogger.debug("phi(0)=%e"%(phi0)) |
106 |
gphi0=f.getInner(gf, p) #gradphi(0., *args0) |
107 |
lslogger.debug("grad phi(0)=%e"%(gphi0)) |
108 |
old_phi_a=phi0 |
109 |
i=1 |
110 |
|
111 |
while i<IMAX and alpha>0. and alpha<alpha_max: |
112 |
args_a=phiargs(alpha) |
113 |
phi_a=phi(alpha, *args_a) |
114 |
lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a)) |
115 |
if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)): |
116 |
alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0) |
117 |
break |
118 |
gphi_a=gradphi(alpha, *args_a) |
119 |
if np.abs(gphi_a) <= -c2*gphi0: |
120 |
break |
121 |
if gphi_a >= 0: |
122 |
alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0) |
123 |
break |
124 |
|
125 |
old_alpha=alpha |
126 |
# the factor is arbitrary as long as there is sufficient increase |
127 |
alpha=2.*alpha |
128 |
old_phi_a=phi_a |
129 |
i+=1 |
130 |
|
131 |
return alpha, phi_a, gf_new[0] |
132 |
|
133 |
############################################################################## |
134 |
class AbstractMinimizer(object): |
135 |
""" |
136 |
Base class for function minimization methods. |
137 |
""" |
138 |
|
139 |
TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2)) |
140 |
|
141 |
def __init__(self, f, tol=1e-5, imax=300): |
142 |
""" |
143 |
Initializes a new minimizer for a given cost function. |
144 |
|
145 |
:param f: the cost function to be minimized |
146 |
:type f: CostFunction |
147 |
""" |
148 |
self._f=f |
149 |
self._tol = tol |
150 |
self._imax = imax |
151 |
self._result = None |
152 |
self._callback = None |
153 |
self.logger = logging.getLogger('inv.%s'%self.__class__.__name__) |
154 |
|
155 |
def setTolerance(self, tol): |
156 |
""" |
157 |
Sets the tolerance for the stopping criterion. The minimizer stops when |
158 |
an appropriate norm is less than `tol`. |
159 |
""" |
160 |
self._tol = tol |
161 |
|
162 |
def setMaxIterations(self, imax): |
163 |
""" |
164 |
Sets the maximum number of iterations before the minimizer terminates. |
165 |
""" |
166 |
self._imax = imax |
167 |
|
168 |
def setCallback(self, callback): |
169 |
""" |
170 |
Sets a callback function to be called after every iteration. |
171 |
The arguments to the function are: (k, x, fx, gfx), where |
172 |
k is the current iteration, x is the current estimate, fx=f(x) and |
173 |
gfx=grad f(x). |
174 |
""" |
175 |
if not callable(callback): |
176 |
raise TypeError("Callback function not callable.") |
177 |
self._callback = callback |
178 |
|
179 |
def _doCallback(self, *args): |
180 |
if self._callback is not None: |
181 |
self._callback(*args) |
182 |
|
183 |
def getResult(self): |
184 |
""" |
185 |
Returns the result of the minimization. |
186 |
""" |
187 |
return self._result |
188 |
|
189 |
def getOptions(self): |
190 |
""" |
191 |
Returns a dictionary of minimizer-specific options. |
192 |
""" |
193 |
return {} |
194 |
|
195 |
def setOptions(self, **opts): |
196 |
""" |
197 |
Sets minimizer-specific options. For a list of possible options see |
198 |
`getOptions()`. |
199 |
""" |
200 |
raise NotImplementedError |
201 |
|
202 |
def run(self, x0): |
203 |
""" |
204 |
Executes the minimization algorithm for `f` starting with the initial |
205 |
guess `x0`. |
206 |
:return: TOLERANCE_REACHED or MAX_ITERATIONS_REACHED |
207 |
""" |
208 |
raise NotImplementedError |
209 |
|
210 |
def logSummary(self): |
211 |
""" |
212 |
Outputs a summary of the completed minimization process to the logger. |
213 |
""" |
214 |
self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls) |
215 |
self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls) |
216 |
self.logger.warning("Number of inner product evaluations: %d"%self._f.Inner_calls) |
217 |
self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls) |
218 |
|
219 |
|
220 |
############################################################################## |
221 |
class MinimizerLBFGS(AbstractMinimizer): |
222 |
""" |
223 |
Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno |
224 |
method. |
225 |
""" |
226 |
|
227 |
# History size |
228 |
_m = 15 |
229 |
|
230 |
# Initial Hessian multiplier |
231 |
_initial_H = 1 |
232 |
|
233 |
def getOptions(self): |
234 |
return {'historySize':self._m,'initialHessian':self._initial_H} |
235 |
|
236 |
def setOptions(self, **opts): |
237 |
for o in opts: |
238 |
if o=='historySize': |
239 |
self._m=opts[o] |
240 |
elif o=='initialHessian': |
241 |
self._initial_H=opts[o] |
242 |
else: |
243 |
raise KeyError("Invalid option '%s'"%o) |
244 |
|
245 |
def getNorm(self, x): |
246 |
return sqrt(self._f.getInner(x, x)) |
247 |
|
248 |
def run(self, x): |
249 |
args=self._f.getArguments(x) |
250 |
gf=self._f.getGradient(x, *args) |
251 |
fx=self._f(x, *args) |
252 |
k=0 |
253 |
H=self._initial_H |
254 |
error=2*self._tol |
255 |
s_and_y=[] |
256 |
|
257 |
self._doCallback(k, x, fx, gf) |
258 |
|
259 |
while error > self._tol and k < self._imax: |
260 |
self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error)) |
261 |
# determine search direction |
262 |
p = -self._twoLoop(H, gf, s_and_y) |
263 |
|
264 |
self.logger.debug("H = %s"%H) |
265 |
self.logger.debug("grad f(x) = %s"%gf) |
266 |
self.logger.debug("p = %s"%p) |
267 |
self.logger.debug("x = %s"%x) |
268 |
|
269 |
# determine step length |
270 |
alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx) |
271 |
self.logger.debug("alpha=%e"%(alpha)) |
272 |
# execute the step |
273 |
x_new = x + alpha*p |
274 |
if gf_new is None: |
275 |
gf_new=self._f.getGradient(x_new) |
276 |
delta_x=x_new-x |
277 |
delta_g=gf_new-gf |
278 |
s_and_y.append((delta_x,delta_g)) |
279 |
error=abs(fx_new-fx)/abs(fx_new) |
280 |
x=x_new |
281 |
gf=gf_new |
282 |
fx=fx_new |
283 |
k+=1 |
284 |
self._doCallback(k, x, fx, gf) |
285 |
if (error<=self._tol): break |
286 |
|
287 |
# delete oldest vector pair |
288 |
if k>self._m: s_and_y.pop(0) |
289 |
|
290 |
# set the new scaling factor (approximation of inverse Hessian) |
291 |
gnorm=self.getNorm(gf) |
292 |
denom=self.getNorm(delta_g) |
293 |
if denom < EPSILON * gnorm: |
294 |
H=1. |
295 |
self.logger.debug("Break down in H update. Resetting to 1.") |
296 |
else: |
297 |
H=self._f.getInner(delta_x,delta_g)/denom**2 |
298 |
|
299 |
if k >= self._imax: |
300 |
reason=self.MAX_ITERATIONS_REACHED |
301 |
self.logger.warning("Maximum number of iterations reached!") |
302 |
else: |
303 |
reason=self.TOLERANCE_REACHED |
304 |
self.logger.warning("Success after %d iterations! Final error=%e"%(k,error)) |
305 |
self._result=x |
306 |
return reason |
307 |
|
308 |
def _twoLoop(self, H, gf, s_and_y): |
309 |
""" |
310 |
Helper for the L-BFGS method. |
311 |
See 'Numerical Optimization' by J. Nocedal for an explanation. |
312 |
""" |
313 |
q=gf |
314 |
alpha=[] |
315 |
for s,y in reversed(s_and_y): |
316 |
rho=1./(self._f.getInner(s, y)) |
317 |
a=rho*self._f.getInner(s, q) |
318 |
alpha.append(a) |
319 |
q=q-a*y |
320 |
|
321 |
r=H*q |
322 |
for s,y in s_and_y: |
323 |
rho=1./(self._f.getInner(s, y)) |
324 |
beta=rho*self._f.getInner(y,r) |
325 |
a=alpha.pop() |
326 |
r=r+s*(a-beta) |
327 |
return r |
328 |
|
329 |
############################################################################## |
330 |
class MinimizerBFGS(AbstractMinimizer): |
331 |
""" |
332 |
Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method. |
333 |
""" |
334 |
|
335 |
def run(self, x): |
336 |
gf=self._f.getGradient(x) |
337 |
fx=None |
338 |
k=0 |
339 |
try: |
340 |
n=len(x) |
341 |
except: |
342 |
n=x.getNumberOfDataPoints() |
343 |
I=np.eye(n) |
344 |
H=I |
345 |
gnorm=Lsup(gf) |
346 |
while gnorm > self._tol and k < self._imax: |
347 |
self.logger.info("iteration %d, gnorm=%e"%(k,gnorm)) |
348 |
|
349 |
# determine search direction |
350 |
d=-self._f.getInner(H, gf) |
351 |
# determine step length |
352 |
alpha, fx, gf_new = line_search(self._f, x, d, gf, fx) |
353 |
self.logger.debug("alpha=%e"%alpha) |
354 |
# execute the step |
355 |
x_new=x+alpha*d |
356 |
delta_x=x_new-x |
357 |
x=x_new |
358 |
if gf_new is None: |
359 |
gf_new=self._f.getGradient(x_new) |
360 |
delta_g=gf_new-gf |
361 |
gf=gf_new |
362 |
k+=1 |
363 |
gnorm=Lsup(gf) |
364 |
if (gnorm<=self._tol): break |
365 |
|
366 |
# update Hessian |
367 |
rho=1./(self._f.getInner(delta_x, delta_g)) |
368 |
self.logger.debug("rho=%e"%rho) |
369 |
A=-rho*delta_x[:,None]*delta_g[None,:] |
370 |
AT=-rho*delta_g[:,None]*delta_x[None,:] |
371 |
H=self._f.getInner(A, self._f.getInner(H,AT)) + rho*delta_x[:,None]*delta_x[None,:] |
372 |
if k >= self._imax: |
373 |
reason=self.MAX_ITERATIONS_REACHED |
374 |
self.logger.warning("Maximum number of iterations reached!") |
375 |
else: |
376 |
reason=self.TOLERANCE_REACHED |
377 |
self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm)) |
378 |
|
379 |
self._result=x |
380 |
return reason |
381 |
|
382 |
############################################################################## |
383 |
class MinimizerNLCG(AbstractMinimizer): |
384 |
""" |
385 |
Minimizer that uses the nonlinear conjugate gradient method |
386 |
""" |
387 |
|
388 |
def run(self, x): |
389 |
i=0 |
390 |
k=0 |
391 |
r=-self._f.getGradient(x) |
392 |
fx=None |
393 |
d=r |
394 |
delta=self._f.getInner(r,r) |
395 |
delta0=delta |
396 |
while i<self._imax and delta>self._tol*self._tol*delta0: |
397 |
self.logger.info("iteration %d, delta=%e"%(i,delta)) |
398 |
delta_d=self._f.getInner(d,d) |
399 |
# this line search is not appropriate since alpha0 may not be |
400 |
# a good search direction |
401 |
alpha, fx, gf_new = line_search(self._f, x, d, -r, fx) |
402 |
self.logger.debug("alpha=%e"%(alpha)) |
403 |
x=x+alpha*d |
404 |
r=-self._f.getGradient(x) if gf_new is None else -gf_new |
405 |
delta_o=delta |
406 |
delta=self._f.getInner(r,r) |
407 |
beta=delta/delta_o |
408 |
d=r+beta*d |
409 |
k=k+1 |
410 |
try: |
411 |
lenx=len(x) |
412 |
except: |
413 |
lenx=x.getNumberOfDataPoints() |
414 |
if k == lenx or self._f.getInner(r,d) <= 0: |
415 |
d=r |
416 |
k=0 |
417 |
i+=1 |
418 |
|
419 |
if i >= self._imax: |
420 |
reason=self.MAX_ITERATIONS_REACHED |
421 |
self.logger.warning("Maximum number of iterations reached!") |
422 |
else: |
423 |
reason=self.TOLERANCE_REACHED |
424 |
self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta)) |
425 |
|
426 |
self._result=x |
427 |
return reason |
428 |
|
429 |
|
430 |
if __name__=="__main__": |
431 |
# Example usage with function 'rosen' (minimum=[1,1,...1]): |
432 |
from scipy.optimize import rosen, rosen_der |
433 |
from esys.downunder.costfunctions import CostFunction |
434 |
import sys |
435 |
N=100 |
436 |
x0=np.array([4.]*N) # initial guess |
437 |
|
438 |
class RosenFunc(CostFunction): |
439 |
def _getInner(self, f0, f1): |
440 |
return np.dot(f0, f1) |
441 |
def _getValue(self, x, *args): |
442 |
return rosen(x) |
443 |
def _getGradient(self, x, *args): |
444 |
return rosen_der(x) |
445 |
|
446 |
f=RosenFunc() |
447 |
m=None |
448 |
if len(sys.argv)>1: |
449 |
method=sys.argv[1].lower() |
450 |
if method=='nlcg': |
451 |
m=MinimizerNLCG(f) |
452 |
elif method=='bfgs': |
453 |
m=MinimizerBFGS(f) |
454 |
|
455 |
if m is None: |
456 |
# default |
457 |
m=MinimizerLBFGS(f) |
458 |
#m.setOptions(historySize=10000) |
459 |
|
460 |
logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG) |
461 |
m.setTolerance(1e-5) |
462 |
m.setMaxIterations(10000) |
463 |
m.run(x0) |
464 |
m.logSummary() |
465 |
print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult()))) |
466 |
|
467 |
#from scipy.optimize import fmin_cg |
468 |
#print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000)))) |
469 |
|