/[escript]/trunk/escript/py_src/minimize.py
ViewVC logotype

Contents of /trunk/escript/py_src/minimize.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 990 - (show annotations)
Wed Feb 21 04:27:52 2007 UTC (12 years, 6 months ago) by ksteube
File MIME type: text/x-python
File size: 19086 byte(s)
Cleaned up the python in-line doc to make epydoc work better

Configured for shake71 to find NetCDF libraries

1 # ******NOTICE***************
2 # optimize.py module by Travis E. Oliphant
3 #
4 # You may copy and use this module as you see fit with no
5 # guarantee implied provided you keep this notice in all copies.
6 # *****END NOTICE************
7
8 # A collection of optimization algorithms. Version 0.3.1
9
10 # Minimization routines
11 """optimize.py
12
13 A collection of general-purpose optimization routines using numarrayeric
14
15 fmin --- Nelder-Mead Simplex algorithm (uses only function calls)
16 fminBFGS --- Quasi-Newton method (uses function and gradient)
17 fminNCG --- Line-search Newton Conjugate Gradient (uses function, gradient and hessian (if it's provided))
18
19 """
20 import numarray
21 __version__="0.3.1"
22
23 def para(x):
24 return (x[0]-2.)**2
25
26 def para_der(x):
27 return 2*(x-2.)
28
29 def para_hess(x):
30 return numarray.ones((1,1))*2.
31
32 def para_hess_p(x,p):
33 return 2*p
34
35 def rosen(x): # The Rosenbrock function
36 return numarray.sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
37
38 def rosen_der(x):
39 xm = x[1:-1]
40 xm_m1 = x[:-2]
41 xm_p1 = x[2:]
42 der = numarray.zeros(x.shape,x.typecode())
43 der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
44 der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
45 der[-1] = 200*(x[-1]-x[-2]**2)
46 return der
47
48 def rosen3_hess_p(x,p):
49 assert(len(x)==3)
50 assert(len(p)==3)
51 hessp = numarray.zeros((3,),x.typecode())
52 hessp[0] = (2 + 800*x[0]**2 - 400*(-x[0]**2 + x[1])) * p[0] \
53 - 400*x[0]*p[1] \
54 + 0
55 hessp[1] = - 400*x[0]*p[0] \
56 + (202 + 800*x[1]**2 - 400*(-x[1]**2 + x[2]))*p[1] \
57 - 400*x[1] * p[2]
58 hessp[2] = 0 \
59 - 400*x[1] * p[1] \
60 + 200 * p[2]
61
62 return hessp
63
64 def rosen3_hess(x):
65 assert(len(x)==3)
66 hessp = numarray.zeros((3,3),x.typecode())
67 hessp[0,:] = [2 + 800*x[0]**2 -400*(-x[0]**2 + x[1]), -400*x[0], 0]
68 hessp[1,:] = [-400*x[0], 202+800*x[1]**2 -400*(-x[1]**2 + x[2]), -400*x[1]]
69 hessp[2,:] = [0,-400*x[1], 200]
70 return hessp
71
72
73 def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None, fulloutput=0, printmessg=1):
74 """xopt,{fval,warnflag} = fmin(function, x0, args=(), xtol=1e-4, ftol=1e-4,
75 maxiter=200*len(x0), maxfun=200*len(x0), fulloutput=0, printmessg=0)
76
77 Uses a Nelder-Mead Simplex algorithm to find the minimum of function
78 of one or more variables.
79 """
80 x0 = numarray.asarray(x0)
81 assert (len(x0.shape)==1)
82 N = len(x0)
83 if maxiter is None:
84 maxiter = N * 200
85 if maxfun is None:
86 maxfun = N * 200
87
88 rho = 1; chi = 2; psi = 0.5; sigma = 0.5;
89 one2np1 = range(1,N+1)
90
91 sim = numarray.zeros((N+1,N),x0.typecode())
92 fsim = numarray.zeros((N+1,),'d')
93 sim[0] = x0
94 fsim[0] = apply(func,(x0,)+args)
95 nonzdelt = 0.05
96 zdelt = 0.00025
97 for k in range(0,N):
98 y = numarray.array(x0,copy=1)
99 if y[k] != 0:
100 y[k] = (1+nonzdelt)*y[k]
101 else:
102 y[k] = zdelt
103
104 sim[k+1] = y
105 f = apply(func,(y,)+args)
106 fsim[k+1] = f
107
108 ind = numarray.argsort(fsim)
109 fsim = numarray.take(fsim,ind) # sort so sim[0,:] has the lowest function value
110 sim = numarray.take(sim,ind,0)
111
112 iterations = 1
113 funcalls = N+1
114
115 while (funcalls < maxfun and iterations < maxiter):
116 if (max(numarray.ravel(numarray.absolute(sim[1:]-sim[0]))) <= xtol \
117 and max(numarray.absolute(fsim[0]-fsim[1:])) <= ftol):
118 break
119
120 xbar = numarray.add.reduce(sim[:-1],0) / N
121 xr = (1+rho)*xbar - rho*sim[-1]
122 fxr = apply(func,(xr,)+args)
123 funcalls = funcalls + 1
124 doshrink = 0
125
126 if fxr < fsim[0]:
127 xe = (1+rho*chi)*xbar - rho*chi*sim[-1]
128 fxe = apply(func,(xe,)+args)
129 funcalls = funcalls + 1
130
131 if fxe < fxr:
132 sim[-1] = xe
133 fsim[-1] = fxe
134 else:
135 sim[-1] = xr
136 fsim[-1] = fxr
137 else: # fsim[0] <= fxr
138 if fxr < fsim[-2]:
139 sim[-1] = xr
140 fsim[-1] = fxr
141 else: # fxr >= fsim[-2]
142 # Perform contraction
143 if fxr < fsim[-1]:
144 xc = (1+psi*rho)*xbar - psi*rho*sim[-1]
145 fxc = apply(func,(xc,)+args)
146 funcalls = funcalls + 1
147
148 if fxc <= fxr:
149 sim[-1] = xc
150 fsim[-1] = fxc
151 else:
152 doshrink=1
153 else:
154 # Perform an inside contraction
155 xcc = (1-psi)*xbar + psi*sim[-1]
156 fxcc = apply(func,(xcc,)+args)
157 funcalls = funcalls + 1
158
159 if fxcc < fsim[-1]:
160 sim[-1] = xcc
161 fsim[-1] = fxcc
162 else:
163 doshrink = 1
164
165 if doshrink:
166 for j in one2np1:
167 sim[j] = sim[0] + sigma*(sim[j] - sim[0])
168 fsim[j] = apply(func,(sim[j],)+args)
169 funcalls = funcalls + N
170
171 ind = numarray.argsort(fsim)
172 sim = numarray.take(sim,ind,0)
173 fsim = numarray.take(fsim,ind)
174 iterations = iterations + 1
175
176 x = sim[0]
177 fval = min(fsim)
178 warnflag = 0
179
180 if funcalls >= maxfun:
181 warnflag = 1
182 if printmessg:
183 print "Warning: Maximum number of function evaluations has been exceeded."
184 elif iterations >= maxiter:
185 warnflag = 2
186 if printmessg:
187 print "Warning: Maximum number of iterations has been exceeded"
188 else:
189 if printmessg:
190 print "Optimization terminated successfully."
191 print " Current function value: %f" % fval
192 print " Iterations: %d" % iterations
193 print " Function evaluations: %d" % funcalls
194
195 if fulloutput:
196 return x, fval, warnflag
197 else:
198 return x
199
200
201 def zoom(a_lo, a_hi):
202 pass
203
204
205
206 def line_search(f, fprime, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=50):
207 """alpha, fc, gc = line_search(f, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=1)
208
209 minimize the function f(xk+alpha pk) using the line search algorithm of
210 Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 59-60
211 """
212
213 fc = 0
214 gc = 0
215 alpha0 = 1.0
216 phi0 = apply(f,(xk,)+args)
217 phi_a0 = apply(f,(xk+alpha0*pk,)+args)
218 fc = fc + 2
219 derphi0 = numarray.dot(gfk,pk)
220 derphi_a0 = numarray.dot(apply(fprime,(xk+alpha0*pk,)+args),pk)
221 gc = gc + 1
222
223 # check to see if alpha0 = 1 satisfies Strong Wolfe conditions.
224 if (phi_a0 <= phi0 + c1*alpha0*derphi0) \
225 and (numarray.absolute(derphi_a0) <= c2*numarray.absolute(derphi0)):
226 return alpha0, fc, gc
227
228 alpha0 = 0
229 alpha1 = 1
230 phi_a1 = phi_a0
231 phi_a0 = phi0
232
233 i = 1
234 while 1:
235 if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
236 ((phi_a1 >= phi_a0) and (i > 1)):
237 return zoom(alpha0, alpha1)
238
239 derphi_a1 = numarray.dot(apply(fprime,(xk+alpha1*pk,)+args),pk)
240 gc = gc + 1
241 if (numarray.absolute(derphi_a1) <= -c2*derphi0):
242 return alpha1
243
244 if (derphi_a1 >= 0):
245 return zoom(alpha1, alpha0)
246
247 alpha2 = (amax-alpha1)*0.25 + alpha1
248 i = i + 1
249 alpha0 = alpha1
250 alpha1 = alpha2
251 phi_a0 = phi_a1
252 phi_a1 = apply(f,(xk+alpha1*pk,)+args)
253
254
255
256 def line_search_BFGS(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1):
257 """alpha, fc, gc = line_search(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1)
258
259 minimize over alpha, the function f(xk+alpha pk) using the interpolation
260 algorithm (Armiijo backtracking) as suggested by
261 Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 56-57
262 """
263
264 fc = 0
265 phi0 = apply(f,(xk,)+args) # compute f(xk)
266 phi_a0 = apply(f,(xk+alpha0*pk,)+args) # compute f
267 fc = fc + 2
268 derphi0 = numarray.dot(gfk,pk)
269
270 if (phi_a0 <= phi0 + c1*alpha0*derphi0):
271 return alpha0, fc, 0
272
273 # Otherwise compute the minimizer of a quadratic interpolant:
274
275 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
276 phi_a1 = apply(f,(xk+alpha1*pk,)+args)
277 fc = fc + 1
278
279 if (phi_a1 <= phi0 + c1*alpha1*derphi0):
280 return alpha1, fc, 0
281
282 # Otherwise loop with cubic interpolation until we find an alpha which satifies
283 # the first Wolfe condition (since we are backtracking, we will assume that
284 # the value of alpha is not too small and satisfies the second condition.
285
286 while 1: # we are assuming pk is a descent direction
287 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
288 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
289 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
290 a = a / factor
291 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
292 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
293 b = b / factor
294
295 alpha2 = (-b + numarray.sqrt(numarray.absolute(b**2 - 3 * a * derphi0))) / (3.0*a)
296 phi_a2 = apply(f,(xk+alpha2*pk,)+args)
297 fc = fc + 1
298
299 if (phi_a2 <= phi0 + c1*alpha2*derphi0):
300 return alpha2, fc, 0
301
302 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
303 alpha2 = alpha1 / 2.0
304
305 alpha0 = alpha1
306 alpha1 = alpha2
307 phi_a0 = phi_a1
308 phi_a1 = phi_a2
309
310 epsilon = 1e-8
311
312 def approx_fprime(xk,f,*args):
313 f0 = apply(f,(xk,)+args)
314 grad = numarray.zeros((len(xk),),'d')
315 ei = numarray.zeros((len(xk),),'d')
316 for k in range(len(xk)):
317 ei[k] = 1.0
318 grad[k] = (apply(f,(xk+epsilon*ei,)+args) - f0)/epsilon
319 ei[k] = 0.0
320 return grad
321
322 def approx_fhess_p(x0,p,fprime,*args):
323 f2 = apply(fprime,(x0+epsilon*p,)+args)
324 f1 = apply(fprime,(x0,)+args)
325 return (f2 - f1)/epsilon
326
327
328 def fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
329 """xopt = fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1)
330
331 Optimize the function, f, whose gradient is given by fprime using the
332 quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)
333 See Wright, and Nocedal 'numarrayerical Optimization', 1999, pg. 198.
334 """
335
336 app_fprime = 0
337 if fprime is None:
338 app_fprime = 1
339
340 x0 = numarray.asarray(x0)
341 if maxiter is None:
342 maxiter = len(x0)*200
343 func_calls = 0
344 grad_calls = 0
345 k = 0
346 N = len(x0)
347 gtol = N*avegtol
348 I = numarray.identity(N)
349 Hk = I
350
351 if app_fprime:
352 gfk = apply(approx_fprime,(x0,f)+args)
353 func_calls = func_calls + len(x0) + 1
354 else:
355 gfk = apply(fprime,(x0,)+args)
356 grad_calls = grad_calls + 1
357 xk = x0
358 sk = [2*gtol]
359 while (numarray.add.reduce(numarray.absolute(gfk)) > gtol) and (k < maxiter):
360 pk = -numarray.dot(Hk,gfk)
361 alpha_k, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
362 func_calls = func_calls + fc
363 xkp1 = xk + alpha_k * pk
364 sk = xkp1 - xk
365 xk = xkp1
366 if app_fprime:
367 gfkp1 = apply(approx_fprime,(xkp1,f)+args)
368 func_calls = func_calls + gc + len(x0) + 1
369 else:
370 gfkp1 = apply(fprime,(xkp1,)+args)
371 grad_calls = grad_calls + gc + 1
372
373 yk = gfkp1 - gfk
374 k = k + 1
375
376 rhok = 1 / numarray.dot(yk,sk)
377 A1 = I - sk[:,numarray.NewAxis] * yk[numarray.NewAxis,:] * rhok
378 A2 = I - yk[:,numarray.NewAxis] * sk[numarray.NewAxis,:] * rhok
379 Hk = numarray.dot(A1,numarray.dot(Hk,A2)) + rhok * sk[:,numarray.NewAxis] * sk[numarray.NewAxis,:]
380 gfk = gfkp1
381
382
383 if printmessg or fulloutput:
384 fval = apply(f,(xk,)+args)
385 if k >= maxiter:
386 warnflag = 1
387 if printmessg:
388 print "Warning: Maximum number of iterations has been exceeded"
389 print " Current function value: %f" % fval
390 print " Iterations: %d" % k
391 print " Function evaluations: %d" % func_calls
392 print " Gradient evaluations: %d" % grad_calls
393 else:
394 warnflag = 0
395 if printmessg:
396 print "Optimization terminated successfully."
397 print " Current function value: %f" % fval
398 print " Iterations: %d" % k
399 print " Function evaluations: %d" % func_calls
400 print " Gradient evaluations: %d" % grad_calls
401
402 if fulloutput:
403 return xk, fval, func_calls, grad_calls, warnflag
404 else:
405 return xk
406
407
408 def fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
409 """xopt = fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1)
410
411 Optimize the function, f, whose gradient is given by fprime using the
412 Newton-CG method. fhess_p must compute the hessian times an arbitrary
413 vector. If it is not given, finite-differences on fprime are used to
414 compute it. See Wright, and Nocedal 'numarrayerical Optimization', 1999,
415 pg. 140.
416 """
417
418 x0 = numarray.asarray(x0)
419 fcalls = 0
420 gcalls = 0
421 hcalls = 0
422 approx_hessp = 0
423 if fhess_p is None and fhess is None: # Define hessian product
424 approx_hessp = 1
425
426 xtol = len(x0)*avextol
427 update = [2*xtol]
428 xk = x0
429 k = 0
430 while (numarray.add.reduce(numarray.absolute(update)) > xtol) and (k < maxiter):
431 # Compute a search direction pk by applying the CG method to
432 # del2 f(xk) p = - grad f(xk) starting from 0.
433 b = -apply(fprime,(xk,)+args)
434 gcalls = gcalls + 1
435 maggrad = numarray.add.reduce(numarray.absolute(b))
436 eta = min([0.5,numarray.sqrt(maggrad)])
437 termcond = eta * maggrad
438 xsupi = numarray.zeros((len(x0),))
439 ri = -b
440 psupi = -ri
441 i = 0
442 dri0 = numarray.dot(ri,ri)
443
444 if fhess is not None: # you want to compute hessian once.
445 A = apply(fhess,(xk,)+args)
446 hcalls = hcalls + 1
447
448 while numarray.add.reduce(numarray.absolute(ri)) > termcond:
449 if fhess is None:
450 if approx_hessp:
451 Ap = apply(approx_fhess_p,(xk,psupi,fprime)+args)
452 gcalls = gcalls + 2
453 else:
454 Ap = apply(fhess_p,(xk,psupi)+args)
455 hcalls = hcalls + 1
456 else:
457 # Ap = numarray.dot(A,psupi)
458 Ap = numarray.matrixmultiply(A,psupi)
459 # check curvature
460 curv = numarray.dot(psupi,Ap)
461 if (curv <= 0):
462 if (i > 0):
463 break
464 else:
465 xsupi = xsupi + dri0/curv * psupi
466 break
467 alphai = dri0 / curv
468 xsupi = xsupi + alphai * psupi
469 ri = ri + alphai * Ap
470 dri1 = numarray.dot(ri,ri)
471 betai = dri1 / dri0
472 psupi = -ri + betai * psupi
473 i = i + 1
474 dri0 = dri1 # update numarray.dot(ri,ri) for next time.
475
476 pk = xsupi # search direction is solution to system.
477 gfk = -b # gradient at xk
478 alphak, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
479 fcalls = fcalls + fc
480 gcalls = gcalls + gc
481
482 update = alphak * pk
483 xk = xk + update
484 k = k + 1
485
486 if printmessg or fulloutput:
487 fval = apply(f,(xk,)+args)
488 if k >= maxiter:
489 warnflag = 1
490 if printmessg:
491 print "Warning: Maximum number of iterations has been exceeded"
492 print " Current function value: %f" % fval
493 print " Iterations: %d" % k
494 print " Function evaluations: %d" % fcalls
495 print " Gradient evaluations: %d" % gcalls
496 print " Hessian evaluations: %d" % hcalls
497 else:
498 warnflag = 0
499 if printmessg:
500 print "Optimization terminated successfully."
501 print " Current function value: %f" % fval
502 print " Iterations: %d" % k
503 print " Function evaluations: %d" % fcalls
504 print " Gradient evaluations: %d" % gcalls
505 print " Hessian evaluations: %d" % hcalls
506
507 if fulloutput:
508 return xk, fval, fcalls, gcalls, hcalls, warnflag
509 else:
510 return xk
511
512
513
514 if __name__ == "__main__":
515 import string
516 import time
517
518
519 times = []
520 algor = []
521 x0 = [0.8,1.2,0.7]
522 start = time.time()
523 x = fmin(rosen,x0)
524 print x
525 times.append(time.time() - start)
526 algor.append('Nelder-Mead Simplex\t')
527
528 start = time.time()
529 x = fminBFGS(rosen, x0, fprime=rosen_der, maxiter=80)
530 print x
531 times.append(time.time() - start)
532 algor.append('BFGS Quasi-Newton\t')
533
534 start = time.time()
535 x = fminBFGS(rosen, x0, avegtol=1e-4, maxiter=100)
536 print x
537 times.append(time.time() - start)
538 algor.append('BFGS without gradient\t')
539
540
541 start = time.time()
542 x = fminNCG(rosen, x0, rosen_der, fhess_p=rosen3_hess_p, maxiter=80)
543 print x
544 times.append(time.time() - start)
545 algor.append('Newton-CG with hessian product')
546
547
548 start = time.time()
549 x = fminNCG(rosen, x0, rosen_der, fhess=rosen3_hess, maxiter=80)
550 print x
551 times.append(time.time() - start)
552 algor.append('Newton-CG with full hessian')
553
554 print "\nMinimizing the Rosenbrock function of order 3\n"
555 print " Algorithm \t\t\t Seconds"
556 print "===========\t\t\t ========="
557 for k in range(len(algor)):
558 print algor[k], "\t -- ", times[k]
559
560 times = []
561 algor=[]
562 x0 = [1.,]
563 start = time.time()
564 x = fmin(para,x0)
565 print x
566 times.append(time.time() - start)
567 algor.append('Nelder-Mead Simplex\t')
568
569 start = time.time()
570 x = fminBFGS(para, x0, fprime=para_der, maxiter=80)
571 print x
572 times.append(time.time() - start)
573 algor.append('BFGS Quasi-Newton\t')
574
575 start = time.time()
576 x = fminBFGS(para, x0, avegtol=1e-4, maxiter=100)
577 print x
578 times.append(time.time() - start)
579 algor.append('BFGS without gradient\t')
580
581
582 start = time.time()
583 x = fminNCG(para, x0, para_der, fhess_p=para_hess_p, maxiter=80)
584 print x
585 times.append(time.time() - start)
586 algor.append('Newton-CG with hessian product')
587
588
589 start = time.time()
590 x = fminNCG(para, x0, para_der, fhess=para_hess, maxiter=80)
591 print x
592 times.append(time.time() - start)
593 algor.append('Newton-CG with full hessian')
594
595 print "\nMinimizing x^2\n"
596 print " Algorithm \t\t\t Seconds"
597 print "===========\t\t\t ========="
598 for k in range(len(algor)):
599 print algor[k], "\t -- ", times[k]

  ViewVC Help
Powered by ViewVC 1.1.26