/[escript]/trunk/escript/py_src/minimize.py
ViewVC logotype

Contents of /trunk/escript/py_src/minimize.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 865 - (show annotations)
Fri Oct 6 10:02:18 2006 UTC (12 years, 11 months ago) by gross
File MIME type: text/x-python
File size: 19223 byte(s)
some functions for minimizing a function added. there is still some work required but successfully applied in an aplication
1 # ******NOTICE***************
2 # optimize.py module by Travis E. Oliphant
3 #
4 # You may copy and use this module as you see fit with no
5 # guarantee implied provided you keep this notice in all copies.
6 # *****END NOTICE************
7
8 # A collection of optimization algorithms. Version 0.3.1
9
10 # Minimization routines
11 """optimize.py
12
13 A collection of general-purpose optimization routines using numarrayeric
14
15 fmin --- Nelder-Mead Simplex algorithm (uses only function calls)
16 fminBFGS --- Quasi-Newton method (uses function and gradient)
17 fminNCG --- Line-search Newton Conjugate Gradient (uses function, gradient
18 and hessian (if it's provided))
19
20 """
21 import numarray
22 __version__="0.3.1"
23
24 def para(x):
25 return (x[0]-2.)**2
26
27 def para_der(x):
28 return 2*(x-2.)
29
30 def para_hess(x):
31 return numarray.ones((1,1))*2.
32
33 def para_hess_p(x,p):
34 return 2*p
35
36 def rosen(x): # The Rosenbrock function
37 return numarray.sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
38
39 def rosen_der(x):
40 xm = x[1:-1]
41 xm_m1 = x[:-2]
42 xm_p1 = x[2:]
43 der = numarray.zeros(x.shape,x.typecode())
44 der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
45 der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
46 der[-1] = 200*(x[-1]-x[-2]**2)
47 return der
48
49 def rosen3_hess_p(x,p):
50 assert(len(x)==3)
51 assert(len(p)==3)
52 hessp = numarray.zeros((3,),x.typecode())
53 hessp[0] = (2 + 800*x[0]**2 - 400*(-x[0]**2 + x[1])) * p[0] \
54 - 400*x[0]*p[1] \
55 + 0
56 hessp[1] = - 400*x[0]*p[0] \
57 + (202 + 800*x[1]**2 - 400*(-x[1]**2 + x[2]))*p[1] \
58 - 400*x[1] * p[2]
59 hessp[2] = 0 \
60 - 400*x[1] * p[1] \
61 + 200 * p[2]
62
63 return hessp
64
65 def rosen3_hess(x):
66 assert(len(x)==3)
67 hessp = numarray.zeros((3,3),x.typecode())
68 hessp[0,:] = [2 + 800*x[0]**2 -400*(-x[0]**2 + x[1]), -400*x[0], 0]
69 hessp[1,:] = [-400*x[0], 202+800*x[1]**2 -400*(-x[1]**2 + x[2]), -400*x[1]]
70 hessp[2,:] = [0,-400*x[1], 200]
71 return hessp
72
73
74 def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None, fulloutput=0, printmessg=1):
75 """xopt,{fval,warnflag} = fmin(function, x0, args=(), xtol=1e-4, ftol=1e-4,
76 maxiter=200*len(x0), maxfun=200*len(x0), fulloutput=0, printmessg=0)
77
78 Uses a Nelder-Mead Simplex algorithm to find the minimum of function
79 of one or more variables.
80 """
81 x0 = numarray.asarray(x0)
82 assert (len(x0.shape)==1)
83 N = len(x0)
84 if maxiter is None:
85 maxiter = N * 200
86 if maxfun is None:
87 maxfun = N * 200
88
89 rho = 1; chi = 2; psi = 0.5; sigma = 0.5;
90 one2np1 = range(1,N+1)
91
92 sim = numarray.zeros((N+1,N),x0.typecode())
93 fsim = numarray.zeros((N+1,),'d')
94 sim[0] = x0
95 fsim[0] = apply(func,(x0,)+args)
96 nonzdelt = 0.05
97 zdelt = 0.00025
98 for k in range(0,N):
99 y = numarray.array(x0,copy=1)
100 if y[k] != 0:
101 y[k] = (1+nonzdelt)*y[k]
102 else:
103 y[k] = zdelt
104
105 sim[k+1] = y
106 f = apply(func,(y,)+args)
107 fsim[k+1] = f
108
109 ind = numarray.argsort(fsim)
110 fsim = numarray.take(fsim,ind) # sort so sim[0,:] has the lowest function value
111 sim = numarray.take(sim,ind,0)
112
113 iterations = 1
114 funcalls = N+1
115
116 while (funcalls < maxfun and iterations < maxiter):
117 if (max(numarray.ravel(numarray.absolute(sim[1:]-sim[0]))) <= xtol \
118 and max(numarray.absolute(fsim[0]-fsim[1:])) <= ftol):
119 break
120
121 xbar = numarray.add.reduce(sim[:-1],0) / N
122 xr = (1+rho)*xbar - rho*sim[-1]
123 fxr = apply(func,(xr,)+args)
124 funcalls = funcalls + 1
125 doshrink = 0
126
127 if fxr < fsim[0]:
128 xe = (1+rho*chi)*xbar - rho*chi*sim[-1]
129 fxe = apply(func,(xe,)+args)
130 funcalls = funcalls + 1
131
132 if fxe < fxr:
133 sim[-1] = xe
134 fsim[-1] = fxe
135 else:
136 sim[-1] = xr
137 fsim[-1] = fxr
138 else: # fsim[0] <= fxr
139 if fxr < fsim[-2]:
140 sim[-1] = xr
141 fsim[-1] = fxr
142 else: # fxr >= fsim[-2]
143 # Perform contraction
144 if fxr < fsim[-1]:
145 xc = (1+psi*rho)*xbar - psi*rho*sim[-1]
146 fxc = apply(func,(xc,)+args)
147 funcalls = funcalls + 1
148
149 if fxc <= fxr:
150 sim[-1] = xc
151 fsim[-1] = fxc
152 else:
153 doshrink=1
154 else:
155 # Perform an inside contraction
156 xcc = (1-psi)*xbar + psi*sim[-1]
157 fxcc = apply(func,(xcc,)+args)
158 funcalls = funcalls + 1
159
160 if fxcc < fsim[-1]:
161 sim[-1] = xcc
162 fsim[-1] = fxcc
163 else:
164 doshrink = 1
165
166 if doshrink:
167 for j in one2np1:
168 sim[j] = sim[0] + sigma*(sim[j] - sim[0])
169 fsim[j] = apply(func,(sim[j],)+args)
170 funcalls = funcalls + N
171
172 ind = numarray.argsort(fsim)
173 sim = numarray.take(sim,ind,0)
174 fsim = numarray.take(fsim,ind)
175 iterations = iterations + 1
176
177 x = sim[0]
178 fval = min(fsim)
179 warnflag = 0
180
181 if funcalls >= maxfun:
182 warnflag = 1
183 if printmessg:
184 print "Warning: Maximum number of function evaluations has been exceeded."
185 elif iterations >= maxiter:
186 warnflag = 2
187 if printmessg:
188 print "Warning: Maximum number of iterations has been exceeded"
189 else:
190 if printmessg:
191 print "Optimization terminated successfully."
192 print " Current function value: %f" % fval
193 print " Iterations: %d" % iterations
194 print " Function evaluations: %d" % funcalls
195
196 if fulloutput:
197 return x, fval, warnflag
198 else:
199 return x
200
201
202 def zoom(a_lo, a_hi):
203 pass
204
205
206
207 def line_search(f, fprime, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=50):
208 """alpha, fc, gc = line_search(f, xk, pk, gfk,
209 args=(), c1=1e-4, c2=0.9, amax=1)
210
211 minimize the function f(xk+alpha pk) using the line search algorithm of
212 Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 59-60
213 """
214
215 fc = 0
216 gc = 0
217 alpha0 = 1.0
218 phi0 = apply(f,(xk,)+args)
219 phi_a0 = apply(f,(xk+alpha0*pk,)+args)
220 fc = fc + 2
221 derphi0 = numarray.dot(gfk,pk)
222 derphi_a0 = numarray.dot(apply(fprime,(xk+alpha0*pk,)+args),pk)
223 gc = gc + 1
224
225 # check to see if alpha0 = 1 satisfies Strong Wolfe conditions.
226 if (phi_a0 <= phi0 + c1*alpha0*derphi0) \
227 and (numarray.absolute(derphi_a0) <= c2*numarray.absolute(derphi0)):
228 return alpha0, fc, gc
229
230 alpha0 = 0
231 alpha1 = 1
232 phi_a1 = phi_a0
233 phi_a0 = phi0
234
235 i = 1
236 while 1:
237 if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
238 ((phi_a1 >= phi_a0) and (i > 1)):
239 return zoom(alpha0, alpha1)
240
241 derphi_a1 = numarray.dot(apply(fprime,(xk+alpha1*pk,)+args),pk)
242 gc = gc + 1
243 if (numarray.absolute(derphi_a1) <= -c2*derphi0):
244 return alpha1
245
246 if (derphi_a1 >= 0):
247 return zoom(alpha1, alpha0)
248
249 alpha2 = (amax-alpha1)*0.25 + alpha1
250 i = i + 1
251 alpha0 = alpha1
252 alpha1 = alpha2
253 phi_a0 = phi_a1
254 phi_a1 = apply(f,(xk+alpha1*pk,)+args)
255
256
257
258 def line_search_BFGS(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1):
259 """alpha, fc, gc = line_search(f, xk, pk, gfk,
260 args=(), c1=1e-4, alpha0=1)
261
262 minimize over alpha, the function f(xk+alpha pk) using the interpolation
263 algorithm (Armiijo backtracking) as suggested by
264 Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 56-57
265 """
266
267 fc = 0
268 phi0 = apply(f,(xk,)+args) # compute f(xk)
269 phi_a0 = apply(f,(xk+alpha0*pk,)+args) # compute f
270 fc = fc + 2
271 derphi0 = numarray.dot(gfk,pk)
272
273 if (phi_a0 <= phi0 + c1*alpha0*derphi0):
274 return alpha0, fc, 0
275
276 # Otherwise compute the minimizer of a quadratic interpolant:
277
278 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
279 phi_a1 = apply(f,(xk+alpha1*pk,)+args)
280 fc = fc + 1
281
282 if (phi_a1 <= phi0 + c1*alpha1*derphi0):
283 return alpha1, fc, 0
284
285 # Otherwise loop with cubic interpolation until we find an alpha which satifies
286 # the first Wolfe condition (since we are backtracking, we will assume that
287 # the value of alpha is not too small and satisfies the second condition.
288
289 while 1: # we are assuming pk is a descent direction
290 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
291 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
292 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
293 a = a / factor
294 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
295 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
296 b = b / factor
297
298 alpha2 = (-b + numarray.sqrt(numarray.absolute(b**2 - 3 * a * derphi0))) / (3.0*a)
299 phi_a2 = apply(f,(xk+alpha2*pk,)+args)
300 fc = fc + 1
301
302 if (phi_a2 <= phi0 + c1*alpha2*derphi0):
303 return alpha2, fc, 0
304
305 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
306 alpha2 = alpha1 / 2.0
307
308 alpha0 = alpha1
309 alpha1 = alpha2
310 phi_a0 = phi_a1
311 phi_a1 = phi_a2
312
313 epsilon = 1e-8
314
315 def approx_fprime(xk,f,*args):
316 f0 = apply(f,(xk,)+args)
317 grad = numarray.zeros((len(xk),),'d')
318 ei = numarray.zeros((len(xk),),'d')
319 for k in range(len(xk)):
320 ei[k] = 1.0
321 grad[k] = (apply(f,(xk+epsilon*ei,)+args) - f0)/epsilon
322 ei[k] = 0.0
323 return grad
324
325 def approx_fhess_p(x0,p,fprime,*args):
326 f2 = apply(fprime,(x0+epsilon*p,)+args)
327 f1 = apply(fprime,(x0,)+args)
328 return (f2 - f1)/epsilon
329
330
331 def fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
332 """xopt = fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5,
333 maxiter=None, fulloutput=0, printmessg=1)
334
335 Optimize the function, f, whose gradient is given by fprime using the
336 quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)
337 See Wright, and Nocedal 'numarrayerical Optimization', 1999, pg. 198.
338 """
339
340 app_fprime = 0
341 if fprime is None:
342 app_fprime = 1
343
344 x0 = numarray.asarray(x0)
345 if maxiter is None:
346 maxiter = len(x0)*200
347 func_calls = 0
348 grad_calls = 0
349 k = 0
350 N = len(x0)
351 gtol = N*avegtol
352 I = numarray.identity(N)
353 Hk = I
354
355 if app_fprime:
356 gfk = apply(approx_fprime,(x0,f)+args)
357 func_calls = func_calls + len(x0) + 1
358 else:
359 gfk = apply(fprime,(x0,)+args)
360 grad_calls = grad_calls + 1
361 xk = x0
362 sk = [2*gtol]
363 while (numarray.add.reduce(numarray.absolute(gfk)) > gtol) and (k < maxiter):
364 pk = -numarray.dot(Hk,gfk)
365 alpha_k, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
366 func_calls = func_calls + fc
367 xkp1 = xk + alpha_k * pk
368 sk = xkp1 - xk
369 xk = xkp1
370 if app_fprime:
371 gfkp1 = apply(approx_fprime,(xkp1,f)+args)
372 func_calls = func_calls + gc + len(x0) + 1
373 else:
374 gfkp1 = apply(fprime,(xkp1,)+args)
375 grad_calls = grad_calls + gc + 1
376
377 yk = gfkp1 - gfk
378 k = k + 1
379
380 rhok = 1 / numarray.dot(yk,sk)
381 A1 = I - sk[:,numarray.NewAxis] * yk[numarray.NewAxis,:] * rhok
382 A2 = I - yk[:,numarray.NewAxis] * sk[numarray.NewAxis,:] * rhok
383 Hk = numarray.dot(A1,numarray.dot(Hk,A2)) + rhok * sk[:,numarray.NewAxis] * sk[numarray.NewAxis,:]
384 gfk = gfkp1
385
386
387 if printmessg or fulloutput:
388 fval = apply(f,(xk,)+args)
389 if k >= maxiter:
390 warnflag = 1
391 if printmessg:
392 print "Warning: Maximum number of iterations has been exceeded"
393 print " Current function value: %f" % fval
394 print " Iterations: %d" % k
395 print " Function evaluations: %d" % func_calls
396 print " Gradient evaluations: %d" % grad_calls
397 else:
398 warnflag = 0
399 if printmessg:
400 print "Optimization terminated successfully."
401 print " Current function value: %f" % fval
402 print " Iterations: %d" % k
403 print " Function evaluations: %d" % func_calls
404 print " Gradient evaluations: %d" % grad_calls
405
406 if fulloutput:
407 return xk, fval, func_calls, grad_calls, warnflag
408 else:
409 return xk
410
411
412 def fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
413 """xopt = fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5,
414 maxiter=None, fulloutput=0, printmessg=1)
415
416 Optimize the function, f, whose gradient is given by fprime using the
417 Newton-CG method. fhess_p must compute the hessian times an arbitrary
418 vector. If it is not given, finite-differences on fprime are used to
419 compute it. See Wright, and Nocedal 'numarrayerical Optimization', 1999,
420 pg. 140.
421 """
422
423 x0 = numarray.asarray(x0)
424 fcalls = 0
425 gcalls = 0
426 hcalls = 0
427 approx_hessp = 0
428 if fhess_p is None and fhess is None: # Define hessian product
429 approx_hessp = 1
430
431 xtol = len(x0)*avextol
432 update = [2*xtol]
433 xk = x0
434 k = 0
435 while (numarray.add.reduce(numarray.absolute(update)) > xtol) and (k < maxiter):
436 # Compute a search direction pk by applying the CG method to
437 # del2 f(xk) p = - grad f(xk) starting from 0.
438 b = -apply(fprime,(xk,)+args)
439 gcalls = gcalls + 1
440 maggrad = numarray.add.reduce(numarray.absolute(b))
441 eta = min([0.5,numarray.sqrt(maggrad)])
442 termcond = eta * maggrad
443 xsupi = numarray.zeros((len(x0),))
444 ri = -b
445 psupi = -ri
446 i = 0
447 dri0 = numarray.dot(ri,ri)
448
449 if fhess is not None: # you want to compute hessian once.
450 A = apply(fhess,(xk,)+args)
451 hcalls = hcalls + 1
452
453 while numarray.add.reduce(numarray.absolute(ri)) > termcond:
454 if fhess is None:
455 if approx_hessp:
456 Ap = apply(approx_fhess_p,(xk,psupi,fprime)+args)
457 gcalls = gcalls + 2
458 else:
459 Ap = apply(fhess_p,(xk,psupi)+args)
460 hcalls = hcalls + 1
461 else:
462 # Ap = numarray.dot(A,psupi)
463 Ap = numarray.matrixmultiply(A,psupi)
464 # check curvature
465 curv = numarray.dot(psupi,Ap)
466 if (curv <= 0):
467 if (i > 0):
468 break
469 else:
470 xsupi = xsupi + dri0/curv * psupi
471 break
472 alphai = dri0 / curv
473 xsupi = xsupi + alphai * psupi
474 ri = ri + alphai * Ap
475 dri1 = numarray.dot(ri,ri)
476 betai = dri1 / dri0
477 psupi = -ri + betai * psupi
478 i = i + 1
479 dri0 = dri1 # update numarray.dot(ri,ri) for next time.
480
481 pk = xsupi # search direction is solution to system.
482 gfk = -b # gradient at xk
483 alphak, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
484 fcalls = fcalls + fc
485 gcalls = gcalls + gc
486
487 update = alphak * pk
488 xk = xk + update
489 k = k + 1
490
491 if printmessg or fulloutput:
492 fval = apply(f,(xk,)+args)
493 if k >= maxiter:
494 warnflag = 1
495 if printmessg:
496 print "Warning: Maximum number of iterations has been exceeded"
497 print " Current function value: %f" % fval
498 print " Iterations: %d" % k
499 print " Function evaluations: %d" % fcalls
500 print " Gradient evaluations: %d" % gcalls
501 print " Hessian evaluations: %d" % hcalls
502 else:
503 warnflag = 0
504 if printmessg:
505 print "Optimization terminated successfully."
506 print " Current function value: %f" % fval
507 print " Iterations: %d" % k
508 print " Function evaluations: %d" % fcalls
509 print " Gradient evaluations: %d" % gcalls
510 print " Hessian evaluations: %d" % hcalls
511
512 if fulloutput:
513 return xk, fval, fcalls, gcalls, hcalls, warnflag
514 else:
515 return xk
516
517
518
519 if __name__ == "__main__":
520 import string
521 import time
522
523
524 times = []
525 algor = []
526 x0 = [0.8,1.2,0.7]
527 start = time.time()
528 x = fmin(rosen,x0)
529 print x
530 times.append(time.time() - start)
531 algor.append('Nelder-Mead Simplex\t')
532
533 start = time.time()
534 x = fminBFGS(rosen, x0, fprime=rosen_der, maxiter=80)
535 print x
536 times.append(time.time() - start)
537 algor.append('BFGS Quasi-Newton\t')
538
539 start = time.time()
540 x = fminBFGS(rosen, x0, avegtol=1e-4, maxiter=100)
541 print x
542 times.append(time.time() - start)
543 algor.append('BFGS without gradient\t')
544
545
546 start = time.time()
547 x = fminNCG(rosen, x0, rosen_der, fhess_p=rosen3_hess_p, maxiter=80)
548 print x
549 times.append(time.time() - start)
550 algor.append('Newton-CG with hessian product')
551
552
553 start = time.time()
554 x = fminNCG(rosen, x0, rosen_der, fhess=rosen3_hess, maxiter=80)
555 print x
556 times.append(time.time() - start)
557 algor.append('Newton-CG with full hessian')
558
559 print "\nMinimizing the Rosenbrock function of order 3\n"
560 print " Algorithm \t\t\t Seconds"
561 print "===========\t\t\t ========="
562 for k in range(len(algor)):
563 print algor[k], "\t -- ", times[k]
564
565 times = []
566 algor=[]
567 x0 = [1.,]
568 start = time.time()
569 x = fmin(para,x0)
570 print x
571 times.append(time.time() - start)
572 algor.append('Nelder-Mead Simplex\t')
573
574 start = time.time()
575 x = fminBFGS(para, x0, fprime=para_der, maxiter=80)
576 print x
577 times.append(time.time() - start)
578 algor.append('BFGS Quasi-Newton\t')
579
580 start = time.time()
581 x = fminBFGS(para, x0, avegtol=1e-4, maxiter=100)
582 print x
583 times.append(time.time() - start)
584 algor.append('BFGS without gradient\t')
585
586
587 start = time.time()
588 x = fminNCG(para, x0, para_der, fhess_p=para_hess_p, maxiter=80)
589 print x
590 times.append(time.time() - start)
591 algor.append('Newton-CG with hessian product')
592
593
594 start = time.time()
595 x = fminNCG(para, x0, para_der, fhess=para_hess, maxiter=80)
596 print x
597 times.append(time.time() - start)
598 algor.append('Newton-CG with full hessian')
599
600 print "\nMinimizing x^2\n"
601 print " Algorithm \t\t\t Seconds"
602 print "===========\t\t\t ========="
603 for k in range(len(algor)):
604 print algor[k], "\t -- ", times[k]

  ViewVC Help
Powered by ViewVC 1.1.26