/[escript]/trunk/downunder/py_src/minimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/minimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4434 - (show annotations)
Mon Jun 3 00:43:29 2013 UTC (5 years, 10 months ago) by caltinay
File MIME type: text/x-python
File size: 22503 byte(s)
tab-fixes and an indexing fix.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2013 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Generic minimization algorithms"""
17
18 __copyright__="""Copyright (c) 2003-2013 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['MinimizerException', 'MinimizerIterationIncurableBreakDown', 'MinimizerMaxIterReached' , 'AbstractMinimizer', 'MinimizerLBFGS', 'MinimizerBFGS', 'MinimizerNLCG']
26
27 import logging
28 import numpy as np
29
30 try:
31 from esys.escript import Lsup, sqrt, EPSILON
32 except:
33 Lsup=lambda x: np.amax(abs(x))
34 sqrt=np.sqrt
35 EPSILON=1e-18
36
37 lslogger=logging.getLogger('inv.minimizer.linesearch')
38 zoomlogger=logging.getLogger('inv.minimizer.linesearch.zoom')
39
40 class MinimizerException(Exception):
41 """
42 This is a generic exception thrown by a minimizer.
43 """
44 pass
45
46 class MinimizerMaxIterReached(MinimizerException):
47 """
48 Exception thrown if the maximum number of iteration steps is reached.
49 """
50 pass
51
52 class MinimizerIterationIncurableBreakDown(MinimizerException):
53 """
54 Exception thrown if the iteration scheme encountered an incurable
55 breakdown.
56 """
57 pass
58
59
60 def _zoom(phi, gradphi, phiargs, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2, phi0, gphi0, IMAX=25):
61 """
62 Helper function for `line_search` below which tries to tighten the range
63 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
64 J. Nocedal for an explanation.
65 """
66 i=0
67 while True:
68 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
69 args_a=phiargs(alpha)
70 phi_a=phi(alpha, *args_a)
71 zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
72 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
73 alpha_hi=alpha
74 else:
75 gphi_a=gradphi(alpha, *args_a)
76 zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
77 if np.abs(gphi_a) <= -c2*gphi0:
78 break
79 if gphi_a*(alpha_hi-alpha_lo) >= 0:
80 alpha_hi = alpha_lo
81 alpha_lo=alpha
82 phi_lo=phi_a
83 i+=1
84 if i>IMAX:
85 gphi_a=None
86 break
87 return alpha, phi_a, gphi_a
88
89 def line_search(f, x, p, g_Jx, Jx, alpha_truncationax=50.0, c1=1e-4, c2=0.9, IMAX=15):
90 """
91 Line search method that satisfies the strong Wolfe conditions.
92 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
93
94 :param f: callable objective function f(x)
95 :param x: start value for the line search
96 :param p: search direction
97 :param g_Jx: value for the gradient of f at x
98 :param Jx: value of f(x)
99 :param alpha_truncationax: algorithm terminates if alpha reaches this value
100 :param c1: value for Armijo condition (see reference)
101 :param c2: value for curvature condition (see reference)
102 :param IMAX: maximum number of iterations to perform
103 """
104 # this stores the latest gradf(x+a*p) which is returned
105 g_Jx_new=[g_Jx]
106 import time
107 def phi(a, *args):
108 """ phi(a):=f(x+a*p) """
109 return f(x+a*p, *args)
110 def gradphi(a, *args):
111 g_Jx_new[0]=f.getGradient(x+a*p, *args)
112 return f.getDualProduct(p, g_Jx_new[0])
113 def phiargs(a):
114 try:
115 args=f.getArguments(x+a*p)
116 except:
117 args=()
118 return args
119 print("JJ start_line %f"%time.time())
120 old_alpha=0.
121 # we assume g_Jx is properly scaled so alpha=1 is a reasonable starting value
122 alpha=1.
123 if Jx is None:
124 args0=phiargs(0.)
125 phi0=phi(0., *args0)
126 else:
127 phi0=Jx
128 lslogger.debug("phi(0)=%e"%(phi0))
129 print("JJ start_dual %f"%time.time())
130 gphi0=f.getDualProduct(p, g_Jx) #gradphi(0., *args0)
131 print("JJ end_dual %f"%time.time())
132 lslogger.debug("grad phi(0)=%e"%(gphi0))
133 old_phi_a=phi0
134 i=1
135
136 while i<IMAX and alpha>0. and alpha<alpha_truncationax:
137 args_a=phiargs(alpha)
138 phi_a=phi(alpha, *args_a)
139 lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
140 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
141 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
142 break
143
144 gphi_a=gradphi(alpha, *args_a)
145 if np.abs(gphi_a) <= -c2*gphi0:
146 break
147 if gphi_a >= 0:
148 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
149 break
150
151 old_alpha=alpha
152 # the factor is arbitrary as long as there is sufficient increase
153 alpha=2.*alpha
154 old_phi_a=phi_a
155 i+=1
156 print("JJ end_line %f"%time.time())
157 return alpha, phi_a, g_Jx_new[0]
158
159
160 ##############################################################################
161 class AbstractMinimizer(object):
162 """
163 Base class for function minimization methods.
164 """
165
166 def __init__(self, J=None, m_tol=1e-4, J_tol=None, imax=300):
167 """
168 Initializes a new minimizer for a given cost function.
169
170 :param J: the cost function to be minimized
171 :type J: `CostFunction`
172 """
173 self.setCostFunction(J)
174 self._m_tol = m_tol
175 self._J_tol = J_tol
176 self._imax = imax
177 self._result = None
178 self._callback = None
179 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
180 self.setTolerance()
181
182 def setCostFunction(self, J):
183 """
184 set the cost function to be minimized
185
186 :param J: the cost function to be minimized
187 :type J: `CostFunction`
188 """
189 self.__J=J
190
191 def getCostFunction(self):
192 """
193 return the cost function to be minimized
194
195 :rtype: `CostFunction`
196 """
197 return self.__J
198
199 def setTolerance(self, m_tol=1e-4, J_tol=None):
200 """
201 Sets the tolerance for the stopping criterion. The minimizer stops
202 when an appropriate norm is less than `m_tol`.
203 """
204 self._m_tol = m_tol
205 self._J_tol = J_tol
206
207 def setMaxIterations(self, imax):
208 """
209 Sets the maximum number of iterations before the minimizer terminates.
210 """
211 self._imax = imax
212
213 def setCallback(self, callback):
214 """
215 Sets a callback function to be called after every iteration.
216 The arguments to the function are: (k, x, Jx, g_Jxx), where
217 k is the current iteration, x is the current estimate, Jx=f(x) and
218 g_Jxx=grad f(x).
219 """
220 if callback is not None and not callable(callback):
221 raise TypeError("Callback function not callable.")
222 self._callback = callback
223
224 def _doCallback(self, *args):
225 if self._callback is not None:
226 self._callback(*args)
227
228 def getResult(self):
229 """
230 Returns the result of the minimization.
231 """
232 return self._result
233
234 def getOptions(self):
235 """
236 Returns a dictionary of minimizer-specific options.
237 """
238 return {}
239
240 def setOptions(self, **opts):
241 """
242 Sets minimizer-specific options. For a list of possible options see
243 `getOptions()`.
244 """
245 raise NotImplementedError
246
247 def run(self, x0):
248 """
249 Executes the minimization algorithm for *f* starting with the initial
250 guess ``x0``.
251
252 :return: the result of the minimization
253 """
254 raise NotImplementedError
255
256 def logSummary(self):
257 """
258 Outputs a summary of the completed minimization process to the logger.
259 """
260 if hasattr(self.getCostFunction(), "Value_calls"):
261 self.logger.info("Number of cost function evaluations: %d"%self.getCostFunction().Value_calls)
262 self.logger.info("Number of gradient evaluations: %d"%self.getCostFunction().Gradient_calls)
263 self.logger.info("Number of inner product evaluations: %d"%self.getCostFunction().DualProduct_calls)
264 self.logger.info("Number of argument evaluations: %d"%self.getCostFunction().Arguments_calls)
265 self.logger.info("Number of norm evaluations: %d"%self.getCostFunction().Norm_calls)
266
267 ##############################################################################
268 class MinimizerLBFGS(AbstractMinimizer):
269 """
270 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
271 method.
272 """
273
274 # History size
275 _truncation = 30
276
277 # Initial Hessian multiplier
278 _initial_H = 1
279
280 # Restart after this many iteration steps
281 _restart = 60
282
283 def getOptions(self):
284 return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
285
286 def setOptions(self, **opts):
287 for o in opts:
288 if o=='historySize' or 'truncation':
289 self._truncation=opts[o]
290 elif o=='initialHessian':
291 self._initial_H=opts[o]
292 elif o=='restart':
293 self._restart=opts[o]
294 else:
295 raise KeyError("Invalid option '%s'"%o)
296
297 def run(self, x):
298 """
299 :param x: Level set function representing our initial guess
300 :type x: `Data`
301 :return: Level set function representing the solution
302 :rtype: `Data`
303 """
304 if self.getCostFunction().provides_inverse_Hessian_approximation:
305 self.getCostFunction().updateHessian()
306 invH_scale = None
307 else:
308 invH_scale = self._initial_H
309
310 # start the iteration:
311 n_iter = 0
312 n_last_break_down=-1
313 non_curable_break_down = False
314 converged = False
315 args=self.getCostFunction().getArguments(x)
316 g_Jx=self.getCostFunction().getGradient(x, *args)
317 Jx=self.getCostFunction()(x, *args) # equivalent to getValue() for Downunder CostFunctions
318 Jx_0=Jx
319
320 while not converged and not non_curable_break_down and n_iter < self._imax:
321 k=0
322 break_down = False
323 s_and_y=[]
324 self._doCallback(n_iter, x, Jx, g_Jx)
325
326 while not converged and not break_down and k < self._restart and n_iter < self._imax:
327 #self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
328 if n_iter%10==0:
329 self.logger.info("********** iteration %3d **********"%n_iter)
330 else:
331 self.logger.debug("********** iteration %3d **********"%n_iter)
332 # determine search direction
333 self.logger.debug("\tJ(x) = %s"%Jx)
334 self.logger.debug("\tgrad f(x) = %s"%g_Jx)
335 if invH_scale: self.logger.debug("\tH = %s"%invH_scale)
336
337 p = -self._twoLoop(invH_scale, g_Jx, s_and_y, x, *args)
338 # determine step length
339 alpha, Jx_new, g_Jx_new = line_search(self.getCostFunction(), x, p, g_Jx, Jx)
340 # this function returns a scaling alpha for the search
341 # direction as well as the cost function evaluation and
342 # gradient for the new solution approximation x_new=x+alpha*p
343 self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)
344
345 # execute the step
346 delta_x = alpha*p
347 x_new = x + delta_x
348 self.logger.debug("\tJ(x) = %s"%Jx_new)
349
350 converged = True
351 if self._J_tol:
352 flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
353 if flag:
354 self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new-Jx_0)*self._J_tol))
355 else:
356 self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx-Jx_new,abs(Jx_new)*self._J_tol))
357
358 converged = converged and flag
359 if self._m_tol:
360 norm_x=self.getCostFunction().getNorm(x_new)
361 norm_dx=self.getCostFunction().getNorm(delta_x)
362 flag= norm_dx <= self._m_tol * norm_x
363 if flag:
364 self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
365 else:
366 self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
367 converged = converged and flag
368
369 x=x_new
370 if converged:
371 break
372
373 # unfortunately there is more work to do!
374 if g_Jx_new is None:
375 args=self.getCostFunction().getArguments(x_new)
376 g_Jx_new=self.getCostFunction().getGradient(x_new, args)
377 delta_g=g_Jx_new-g_Jx
378
379 rho=self.getCostFunction().getDualProduct(delta_x, delta_g)
380 if abs(rho)>0:
381 s_and_y.append((delta_x,delta_g, rho ))
382 else:
383 break_down=True
384
385 self.getCostFunction().updateHessian()
386 g_Jx=g_Jx_new
387 Jx=Jx_new
388
389 k+=1
390 n_iter+=1
391 self._doCallback(n_iter, x, Jx, g_Jx)
392
393 # delete oldest vector pair
394 if k>self._truncation: s_and_y.pop(0)
395
396 if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
397 # set the new scaling factor (approximation of inverse Hessian)
398 denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
399 if denom > 0:
400 invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
401 else:
402 invH_scale=self._initial_H
403 self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
404 # case handling for inner iteration:
405 if break_down:
406 if n_iter == n_last_break_down+1:
407 non_curable_break_down = True
408 self.logger.debug("** Incurable break down detected in step %d."%n_iter)
409 else:
410 n_last_break_down = n_iter
411 self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
412 if not k < self._restart:
413 self.logger.debug("Iteration is restarted after %d steps."%n_iter)
414
415 # case handling for inner iteration:
416 self._result=x
417 if n_iter >= self._imax:
418 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
419 raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
420 elif non_curable_break_down:
421 self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
422 raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)
423
424 self.logger.info("Success after %d iterations!"%n_iter)
425 return self._result
426
427 def _twoLoop(self, invH_scale, g_Jx, s_and_y, x, *args):
428 """
429 Helper for the L-BFGS method.
430 See 'Numerical Optimization' by J. Nocedal for an explanation.
431 """
432 q=g_Jx
433 alpha=[]
434 for s,y, rho in reversed(s_and_y):
435 a=self.getCostFunction().getDualProduct(s, q)/rho
436 alpha.append(a)
437 q=q-a*y
438
439 if self.getCostFunction().provides_inverse_Hessian_approximation:
440 r = self.getCostFunction().getInverseHessianApproximation(x, q, *args)
441 else:
442 r = invH_scale * q
443
444 for s,y,rho in s_and_y:
445 beta = self.getCostFunction().getDualProduct(r, y)/rho
446 a = alpha.pop()
447 r = r + s * (a-beta)
448 return r
449
450 ##############################################################################
451 class MinimizerBFGS(AbstractMinimizer):
452 """
453 Minimizer that uses the Broyden-Fletcher-Goldfarb-Shanno method.
454 """
455
456 # Initial Hessian multiplier
457 _initial_H = 1
458
459 def getOptions(self):
460 return {'initialHessian':self._initial_H}
461
462 def setOptions(self, **opts):
463 for o in opts:
464 if o=='initialHessian':
465 self._initial_H=opts[o]
466 else:
467 raise KeyError("Invalid option '%s'"%o)
468
469 def run(self, x):
470 args=self.getCostFunction().getArguments(x)
471 g_Jx=self.getCostFunction().getGradient(x, *args)
472 Jx=self.getCostFunction()(x, *args)
473 k=0
474 try:
475 n=len(x)
476 except:
477 n=x.getNumberOfDataPoints()
478 I=np.eye(n)
479 H=self._initial_H*I
480 gnorm=Lsup(g_Jx)
481 self._doCallback(k, x, Jx, g_Jx)
482
483 while gnorm > self._m_tol and k < self._imax:
484 self.logger.debug("iteration %d, gnorm=%e"%(k,gnorm))
485
486 # determine search direction
487 d=-self.getCostFunction().getDualProduct(H, g_Jx)
488
489 self.logger.debug("H = %s"%H)
490 self.logger.debug("grad f(x) = %s"%g_Jx)
491 self.logger.debug("d = %s"%d)
492 self.logger.debug("x = %s"%x)
493
494 # determine step length
495 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, g_Jx, Jx)
496 self.logger.debug("alpha=%e"%alpha)
497 # execute the step
498 x_new=x+alpha*d
499 delta_x=x_new-x
500 x=x_new
501 if g_Jx_new is None:
502 g_Jx_new=self.getCostFunction().getGradient(x_new)
503 delta_g=g_Jx_new-g_Jx
504 g_Jx=g_Jx_new
505 k+=1
506 self._doCallback(k, x, Jx, g_Jx)
507 gnorm=Lsup(g_Jx)
508 self._result=x
509 if (gnorm<=self._m_tol): break
510
511 # update Hessian
512 denom=self.getCostFunction().getDualProduct(delta_x, delta_g)
513 if denom < EPSILON * gnorm:
514 denom=1e-5
515 self.logger.debug("Break down in H update. Resetting.")
516 rho=1./denom
517 self.logger.debug("rho=%e"%rho)
518 A=I-rho*delta_x[:,None]*delta_g[None,:]
519 AT=I-rho*delta_g[:,None]*delta_x[None,:]
520 H=self.getCostFunction().getDualProduct(A, self.getCostFunction().getDualProduct(H,AT)) + rho*delta_x[:,None]*delta_x[None,:]
521
522 if k >= self._imax:
523 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
524 raise MinimizerMaxIterReached("Gave up after %d steps."%k)
525
526 self.logger.info("Success after %d iterations! Final gnorm=%e"%(k,gnorm))
527 return self._result
528
529 ##############################################################################
530 class MinimizerNLCG(AbstractMinimizer):
531 """
532 Minimizer that uses the nonlinear conjugate gradient method
533 (Fletcher-Reeves variant).
534 """
535
536 def run(self, x):
537 i=0
538 k=0
539 args=self.getCostFunction().getArguments(x)
540 r=-self.getCostFunction().getGradient(x, *args)
541 Jx=self.getCostFunction()(x, *args)
542 d=r
543 delta=self.getCostFunction().getDualProduct(r,r)
544 delta0=delta
545 self._doCallback(i, x, Jx, -r)
546
547 while i<self._imax and Lsup(r)>self._m_tol:
548 self.logger.debug("iteration %d"%i)
549 self.logger.debug("grad f(x) = %s"%(-r))
550 self.logger.debug("d = %s"%d)
551 self.logger.debug("x = %s"%x)
552
553 alpha, Jx, g_Jx_new = line_search(self.getCostFunction(), x, d, -r, Jx, c2=0.4)
554 self.logger.debug("alpha=%e"%(alpha))
555 x=x+alpha*d
556 r=-self.getCostFunction().getGradient(x) if g_Jx_new is None else -g_Jx_new
557 delta_o=delta
558 delta=self.getCostFunction().getDualProduct(r,r)
559 beta=delta/delta_o
560 d=r+beta*d
561 k=k+1
562 try:
563 lenx=len(x)
564 except:
565 lenx=x.getNumberOfDataPoints()
566 if k == lenx or self.getCostFunction().getDualProduct(r,d) <= 0:
567 d=r
568 k=0
569 i+=1
570 self._doCallback(i, x, Jx, g_Jx_new)
571 self._result=x
572
573 if i >= self._imax:
574 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
575 raise MinimizerMaxIterReached("Gave up after %d steps."%i)
576
577
578 self.logger.info("Success after %d iterations! Final delta=%e"%(i,delta))
579 return self._result
580
581
582 if __name__=="__main__":
583 # Example usage with function 'rosen' (minimum=[1,1,...1]):
584 from scipy.optimize import rosen, rosen_der
585 from esys.downunder import MeteredCostFunction
586 import sys
587 N=10
588 x0=np.array([4.]*N) # initial guess
589
590 class RosenFunc(MeteredCostFunction):
591 def __init__(self):
592 super(RosenFunc, self).__init__()
593 self.provides_inverse_Hessian_approximation=False
594 def _getDualProduct(self, f0, f1):
595 return np.dot(f0, f1)
596 def _getValue(self, x, *args):
597 return rosen(x)
598 def _getGradient(self, x, *args):
599 return rosen_der(x)
600 def _getNorm(self,x):
601 return Lsup(x)
602
603 f=RosenFunc()
604 m=None
605 if len(sys.argv)>1:
606 method=sys.argv[1].lower()
607 if method=='nlcg':
608 m=MinimizerNLCG(f)
609 elif method=='bfgs':
610 m=MinimizerBFGS(f)
611
612 if m is None:
613 # default
614 m=MinimizerLBFGS(f)
615 #m.setOptions(historySize=10000)
616
617 logging.basicConfig(format='[%(funcName)s] \033[1;30m%(message)s\033[0m', level=logging.DEBUG)
618 m.setTolerance(m_tol=1e-5)
619 m.setMaxIterations(600)
620 m.run(x0)
621 m.logSummary()
622 print("\tLsup(result)=%.8f"%np.amax(abs(m.getResult())))
623
624 #from scipy.optimize import fmin_cg
625 #print("scipy ref=%.8f"%np.amax(abs(fmin_cg(rosen, x0, rosen_der, maxiter=10000))))
626

  ViewVC Help
Powered by ViewVC 1.1.26