/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3947 - (show annotations)
Wed Aug 22 23:19:10 2012 UTC (8 years, 6 months ago) by caltinay
File MIME type: text/x-python
File size: 15261 byte(s)
Compiling and installing downunder module now. Adjusted import statements
accordingly. Added a gravity test run.

1
2 ########################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # Earth Systems Science Computational Center (ESSCC)
6 # http://www.uq.edu.au/esscc
7 #
8 # Primary Business: Queensland, Australia
9 # Licensed under the Open Software License version 3.0
10 # http://www.opensource.org/licenses/osl-3.0.php
11 #
12 ########################################################
13
14 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
15 Earth Systems Science Computational Center (ESSCC)
16 http://www.uq.edu.au/esscc
17 Primary Business: Queensland, Australia"""
18 __license__="""Licensed under the Open Software License version 3.0
19 http://www.opensource.org/licenses/osl-3.0.php"""
20 __url__="https://launchpad.net/escript-finley"
21
22 __all__ = ['AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
23
24 import logging
25 import numpy as np
26 try:
27 from esys.escript import Lsup, sqrt, EPSILON
28 except:
29 Lsup=lambda x: np.amax(abs(x))
30 sqrt=np.sqrt
31 EPSILON=1e-18
32
33 lslogger=logging.getLogger('inv.minimizer.linesearch')
34 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
35
36 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
37 """
38 Helper function for `line_search` below which tries to tighten the range
39 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
40 J. Nocedal for an explanation.
41 """
42 i=0
43 while True:
44 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
45 args_a=phiargs(alpha)
46 phi_a=phi(alpha, *args_a)
47 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
48 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
49 alpha_hi=alpha
50 else:
51 gphi_a=gradphi(alpha, *args_a)
52 zoomlogger.debug("grad(phi(alpha))=%e"%(gphi_a))
53 if np.abs(gphi_a) <= -c2*gphi0:
54 break
55 if gphi_a*(alpha_hi-alpha_lo) >= 0:
56 alpha_hi = alpha_lo
57 alpha_lo=alpha
58 phi_lo=phi_a
59 i+=1
60 if i>IMAX:
61 gphi_a=None
62 break
63 return alpha, phi_a, gphi_a
64
65 def line_search(f, x, p, gf, fx, alpha_max=50.0, c1=1e-4, c2=0.9, IMAX=15):
66 """
67 Line search method that satisfies the strong Wolfe conditions.
68 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
69
70 :param f: callable objective function f(x)
71 :param x: start value for the line search
72 :param p: search direction
73 :param gf: value for the gradient of f at x
74 :param fx: value of f(x)
75 :param alpha_max: algorithm terminates if alpha reaches this value
76 :param c1: value for Armijo condition (see reference)
77 :param c2: value for curvature condition (see reference)
78 :param IMAX: maximum number of iterations to perform
79 """
80 # this stores the latest gradf(x+a*p) which is returned
81 gf_new=[gf]
82
83 def phi(a, *args):
84 """ phi(a):=f(x+a*p) """
85 return f(x+a*p, *args)
86 def gradphi(a, *args):
87 gf_new[0]=f.getGradient(x+a*p, *args)
88 return f.getInner(gf_new[0], p)
89 #return f.getDirectionalDerivative(x+a*p, p, *args)
90 def phiargs(a):
91 try:
92 args=f.getArguments(x+a*p)
93 except:
94 args=()
95 return args
96
97 old_alpha=0.
98 # we assume gf is properly scaled so alpha=1 is a reasonable starting value
99 alpha=1.
100 if fx is None:
101 args0=phiargs(0.)
102 phi0=phi(0., *args0)
103 else:
104 phi0=fx
105 lslogger.debug("phi(0)=%e"%(phi0))
106 gphi0=f.getInner(gf, p) #gradphi(0., *args0)
107 lslogger.debug("grad phi(0)=%e"%(gphi0))
108 old_phi_a=phi0
109 i=1
110
111 while i<IMAX and alpha>0. and alpha<alpha_max:
112 args_a=phiargs(alpha)
113 phi_a=phi(alpha, *args_a)
114 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
115 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
116 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
117 break
118 gphi_a=gradphi(alpha, *args_a)
119 if np.abs(gphi_a) <= -c2*gphi0:
120 break
121 if gphi_a >= 0:
122 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
123 break
124
125 old_alpha=alpha
126 # the factor is arbitrary as long as there is sufficient increase
127 alpha=2.*alpha
128 old_phi_a=phi_a
129 i+=1
130
131 return alpha, phi_a, gf_new[0]
132
133 ##############################################################################
134 class AbstractMinimizer(object):
135 """
136 Base class for function minimization methods.
137 """
138
139 TOLERANCE_REACHED, MAX_ITERATIONS_REACHED=list(xrange(2))
140
141 def __init__(self, f, tol=1e-5, imax=300):
142 """
143 Initializes a new minimizer for a given cost function.
144
145 :param f: the cost function to be minimized
146 :type f: CostFunction
147 """
148 self._f=f
149 self._tol = tol
150 self._imax = imax
151 self._result = None
152 self._callback = None
153 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
154
155 def setTolerance(self, tol):
156 """
157 Sets the tolerance for the stopping criterion. The minimizer stops when
158 an appropriate norm is less than `tol`.
159 """
160 self._tol = tol
161
162 def setMaxIterations(self, imax):
163 """
164 Sets the maximum number of iterations before the minimizer terminates.
165 """
166 self._imax = imax
167
168 def setCallback(self, callback):
169 """
170 Sets a callback function to be called after every iteration.
171 The arguments to the function are: (k, x, fx, gfx), where
172 k is the current iteration, x is the current estimate, fx=f(x) and
173 gfx=grad f(x).
174 """
175 if not callable(callback):
176 raise TypeError("Callback function not callable.")
177 self._callback = callback
178
179 def _doCallback(self, *args):
180 if self._callback is not None:
181 self._callback(*args)
182
183 def getResult(self):
184 """
185 Returns the result of the minimization.
186 """
187 return self._result
188
189 def getOptions(self):
190 """
191 Returns a dictionary of minimizer-specific options.
192 """
193 return {}
194
195 def setOptions(self, **opts):
196 """
197 Sets minimizer-specific options. For a list of possible options see
198 `getOptions()`.
199 """
200 raise NotImplementedError
201
202 def run(self, x0):
203 """
204 Executes the minimization algorithm for `f` starting with the initial
205 guess `x0`.
206 :return: TOLERANCE_REACHED or MAX_ITERATIONS_REACHED
207 """
208 raise NotImplementedError
209
210 def logSummary(self):
211 """
212 Outputs a summary of the completed minimization process to the logger.
213 """
214 self.logger.warning("Number of function evaluations: %d"%self._f.Value_calls)
215 self.logger.warning("Number of gradient evaluations: %d"%self._f.Gradient_calls)
216 self.logger.warning("Number of inner product evaluations: %d"%self._f.Inner_calls)
217 self.logger.warning("Number of argument evaluations: %d"%self._f.Arguments_calls)
218
219
220 ##############################################################################
221 class MinimizerLBFGS(AbstractMinimizer):
222 """
223 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
224 method.
225 """
226
227 # History size
228 _m = 15
229
230 # Initial Hessian multiplier
231 _initial_H = 1
232
233 def getOptions(self):
234 return {'historySize':self._m,'initialHessian':self._initial_H}
235
236 def setOptions(self, **opts):
237 for o in opts:
238 if o=='historySize':
239 self._m=opts[o]
240 elif o=='initialHessian':
241 self._initial_H=opts[o]
242 else:
243 raise KeyError("Invalid option '%s'"%o)
244
245 def getNorm(self, x):
246 return sqrt(self._f.getInner(x, x))
247
248 def run(self, x):
249 args=self._f.getArguments(x)
250 gf=self._f.getGradient(x, *args)
251 fx=self._f(x, *args)
252 k=0
253 H=self._initial_H
254 error=2*self._tol
255 s_and_y=[]
256
257 self._doCallback(k, x, fx, gf)
258
259 while error > self._tol and k < self._imax:
260 self.logger.info("\033[1;31miteration %d\033[1;30m, error=%e"%(k,error))
261 # determine search direction
262 p = -self._twoLoop(H, gf, s_and_y)
263
264 self.logger.debug("H = %s"%H)
265 self.logger.debug("grad f(x) = %s"%gf)
266 self.logger.debug("p = %s"%p)
267 self.logger.debug("x = %s"%x)
268
269 # determine step length
270 alpha, fx_new, gf_new = line_search(self._f, x, p, gf, fx)
271 self.logger.debug("alpha=%e"%(alpha))
272 # execute the step
273 x_new = x + alpha*p
274 if gf_new is None:
275 gf_new=self._f.getGradient(x_new)
276 delta_x=x_new-x
277 delta_g=gf_new-gf
278 s_and_y.append((delta_x,delta_g))
279 error=abs(fx_new-fx)/abs(fx_new)
280 x=x_new
281 gf=gf_new
282 fx=fx_new
283 k+=1
284 self._doCallback(k, x, fx, gf)
285 if (error<=self._tol): break
286
287 # delete oldest vector pair
288 if k>self._m: s_and_y.pop(0)
289
290 # set the new scaling factor (approximation of inverse Hessian)
291 gnorm=self.getNorm(gf)
292 denom=self.getNorm(delta_g)
293 if denom < EPSILON * gnorm:
294 H=1.
295 self.logger.debug("Break down in H update. Resetting to 1.")
296 else:
297 H=self._f.getInner(delta_x,delta_g)/denom**2
298
299 if k >= self._imax:
300 reason=self.MAX_ITERATIONS_REACHED
301 self.logger.warning("Maximum number of iterations reached!")
302 else:
303 reason=self.TOLERANCE_REACHED
304 self.logger.warning("Success after %d iterations! Final error=%e"%(k,error))
305 self._result=x
306 return reason
307
308 def _twoLoop(self, H, gf, s_and_y):
309 """
310 Helper for the L-BFGS method.
311 See 'Numerical Optimization' by J. Nocedal for an explanation.
312 """
313 q=gf
314 alpha=[]
315 for s,y in reversed(s_and_y):
316 rho=1./(self._f.getInner(s, y))
317 a=rho*self._f.getInner(s, q)
318 alpha.append(a)
319 q=q-a*y
320
321 r=H*q
322 for s,y in s_and_y:
323 rho=1./(self._f.getInner(s, y))
324 beta=rho*self._f.getInner(y,r)
325 a=alpha.pop()
326 r=r+s*(a-beta)
327 return r
328
329 ##############################################################################
330 class MinimizerBFGS(AbstractMinimizer):
331 """
332 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
333 """
334
335 def run(self, x):
336 gf=self._f.getGradient(x)
337 fx=None
338 k=0
339 try:
340 n=len(x)
341 except:
342 n=x.getNumberOfDataPoints()
343 I=np.eye(n)
344 H=I
345 gnorm=Lsup(gf)
346 while gnorm > self._tol and k < self._imax:
347 self.logger.info("iteration %d, gnorm=%e"%(k,gnorm))
348
349 # determine search direction
350 d=-self._f.getInner(H, gf)
351 # determine step length
352 alpha, fx, gf_new = line_search(self._f, x, d, gf, fx)
353 self.logger.debug("alpha=%e"%alpha)
354 # execute the step
355 x_new=x+alpha*d
356 delta_x=x_new-x
357 x=x_new
358 if gf_new is None:
359 gf_new=self._f.getGradient(x_new)
360 delta_g=gf_new-gf
361 gf=gf_new
362 k+=1
363 gnorm=Lsup(gf)
364 if (gnorm<=self._tol): break
365
366 # update Hessian
367 rho=1./(self._f.getInner(delta_x, delta_g))
368 self.logger.debug("rho=%e"%rho)
369 A=-rho*delta_x[:,None]*delta_g[None,:]
370 AT=-rho*delta_g[:,None]*delta_x[None,:]
371 H=self._f.getInner(A, self._f.getInner(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
372 if k >= self._imax:
373 reason=self.MAX_ITERATIONS_REACHED
374 self.logger.warning("Maximum number of iterations reached!")
375 else:
376 reason=self.TOLERANCE_REACHED
377 self.logger.warning("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
378
379 self._result=x
380 return reason
381
382 ##############################################################################
383 class MinimizerNLCG(AbstractMinimizer):
384 """
385 Minimizer that uses the nonlinear conjugate gradient method
386 """
387
388 def run(self, x):
389 i=0
390 k=0
391 r=-self._f.getGradient(x)
392 fx=None
393 d=r
394 delta=self._f.getInner(r,r)
395 delta0=delta
396 while i<self._imax and delta>self._tol*self._tol*delta0:
397 self.logger.info("iteration %d, delta=%e"%(i,delta))
398 delta_d=self._f.getInner(d,d)
399 # this line search is not appropriate since alpha0 may not be
400 # a good search direction
401 alpha, fx, gf_new = line_search(self._f, x, d, -r, fx)
402 self.logger.debug("alpha=%e"%(alpha))
403 x=x+alpha*d
404 r=-self._f.getGradient(x) if gf_new is None else -gf_new
405 delta_o=delta
406 delta=self._f.getInner(r,r)
407 beta=delta/delta_o
408 d=r+beta*d
409 k=k+1
410 try:
411 lenx=len(x)
412 except:
413 lenx=x.getNumberOfDataPoints()
414 if k == lenx or self._f.getInner(r,d) <= 0:
415 d=r
416 k=0
417 i+=1
418
419 if i >= self._imax:
420 reason=self.MAX_ITERATIONS_REACHED
421 self.logger.warning("Maximum number of iterations reached!")
422 else:
423 reason=self.TOLERANCE_REACHED
424 self.logger.warning("Success after %d iterations! Final delta=%e"%(i,delta))
425
426 self._result=x
427 return reason
428
429
430 if __name__=="__main__":
431 # Example usage with function 'rosen' (minimum=[1,1,...1]):
432 from scipy.optimize import rosen, rosen_der
433 from esys.downunder.costfunctions import CostFunction
434 import sys
435 N=100
436 x0=np.array([4.]*N) # initial guess
437
438 class RosenFunc(CostFunction):
439 def _getInner(self, f0, f1):
440 return np.dot(f0, f1)
441 def _getValue(self, x, *args):
442 return rosen(x)
443 def _getGradient(self, x, *args):
444 return rosen_der(x)
445
446 f=RosenFunc()
447 m=None
448 if len(sys.argv)>1:
449 method=sys.argv[1].lower()
450 if method=='nlcg':
451 m=MinimizerNLCG(f)
452 elif method=='bfgs':
453 m=MinimizerBFGS(f)
454
455 if m is None:
456 # default
457 m=MinimizerLBFGS(f)
458 #m.setOptions(historySize=10000)
459
460 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
461 m.setTolerance(1e-5)
462 m.setMaxIterations(10000)
463 m.run(x0)
464 m.logSummary()
465 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
466
467 #from scipy.optimize import fmin_cg
468 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
469

  ViewVC Help
Powered by ViewVC 1.1.26