/[escript]/trunk/escript/py_src/minimize.py
ViewVC logotype

Annotation of /trunk/escript/py_src/minimize.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1388 - (hide annotations)
Fri Jan 11 07:45:58 2008 UTC (11 years, 9 months ago) by trankine
File MIME type: text/x-python
File size: 19086 byte(s)
And get the *(&(*&(* name right
1 gross 865 # ******NOTICE***************
2     # optimize.py module by Travis E. Oliphant
3     #
4     # You may copy and use this module as you see fit with no
5     # guarantee implied provided you keep this notice in all copies.
6     # *****END NOTICE************
7    
8     # A collection of optimization algorithms. Version 0.3.1
9    
10     # Minimization routines
11     """optimize.py
12    
13     A collection of general-purpose optimization routines using numarrayeric
14    
15     fmin --- Nelder-Mead Simplex algorithm (uses only function calls)
16     fminBFGS --- Quasi-Newton method (uses function and gradient)
17 ksteube 990 fminNCG --- Line-search Newton Conjugate Gradient (uses function, gradient and hessian (if it's provided))
18 gross 865
19     """
20     import numarray
21     __version__="0.3.1"
22    
23     def para(x):
24     return (x[0]-2.)**2
25    
26     def para_der(x):
27     return 2*(x-2.)
28    
29     def para_hess(x):
30     return numarray.ones((1,1))*2.
31    
32     def para_hess_p(x,p):
33     return 2*p
34    
35     def rosen(x): # The Rosenbrock function
36     return numarray.sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
37    
38     def rosen_der(x):
39     xm = x[1:-1]
40     xm_m1 = x[:-2]
41     xm_p1 = x[2:]
42     der = numarray.zeros(x.shape,x.typecode())
43     der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
44     der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
45     der[-1] = 200*(x[-1]-x[-2]**2)
46     return der
47    
48     def rosen3_hess_p(x,p):
49     assert(len(x)==3)
50     assert(len(p)==3)
51     hessp = numarray.zeros((3,),x.typecode())
52     hessp[0] = (2 + 800*x[0]**2 - 400*(-x[0]**2 + x[1])) * p[0] \
53     - 400*x[0]*p[1] \
54     + 0
55     hessp[1] = - 400*x[0]*p[0] \
56     + (202 + 800*x[1]**2 - 400*(-x[1]**2 + x[2]))*p[1] \
57     - 400*x[1] * p[2]
58     hessp[2] = 0 \
59     - 400*x[1] * p[1] \
60     + 200 * p[2]
61    
62     return hessp
63    
64     def rosen3_hess(x):
65     assert(len(x)==3)
66     hessp = numarray.zeros((3,3),x.typecode())
67     hessp[0,:] = [2 + 800*x[0]**2 -400*(-x[0]**2 + x[1]), -400*x[0], 0]
68     hessp[1,:] = [-400*x[0], 202+800*x[1]**2 -400*(-x[1]**2 + x[2]), -400*x[1]]
69     hessp[2,:] = [0,-400*x[1], 200]
70     return hessp
71    
72    
73     def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None, fulloutput=0, printmessg=1):
74     """xopt,{fval,warnflag} = fmin(function, x0, args=(), xtol=1e-4, ftol=1e-4,
75     maxiter=200*len(x0), maxfun=200*len(x0), fulloutput=0, printmessg=0)
76    
77     Uses a Nelder-Mead Simplex algorithm to find the minimum of function
78     of one or more variables.
79     """
80     x0 = numarray.asarray(x0)
81     assert (len(x0.shape)==1)
82     N = len(x0)
83     if maxiter is None:
84     maxiter = N * 200
85     if maxfun is None:
86     maxfun = N * 200
87    
88     rho = 1; chi = 2; psi = 0.5; sigma = 0.5;
89     one2np1 = range(1,N+1)
90    
91     sim = numarray.zeros((N+1,N),x0.typecode())
92     fsim = numarray.zeros((N+1,),'d')
93     sim[0] = x0
94     fsim[0] = apply(func,(x0,)+args)
95     nonzdelt = 0.05
96     zdelt = 0.00025
97     for k in range(0,N):
98     y = numarray.array(x0,copy=1)
99     if y[k] != 0:
100     y[k] = (1+nonzdelt)*y[k]
101     else:
102     y[k] = zdelt
103    
104     sim[k+1] = y
105     f = apply(func,(y,)+args)
106     fsim[k+1] = f
107    
108     ind = numarray.argsort(fsim)
109     fsim = numarray.take(fsim,ind) # sort so sim[0,:] has the lowest function value
110     sim = numarray.take(sim,ind,0)
111    
112     iterations = 1
113     funcalls = N+1
114    
115     while (funcalls < maxfun and iterations < maxiter):
116     if (max(numarray.ravel(numarray.absolute(sim[1:]-sim[0]))) <= xtol \
117     and max(numarray.absolute(fsim[0]-fsim[1:])) <= ftol):
118     break
119    
120     xbar = numarray.add.reduce(sim[:-1],0) / N
121     xr = (1+rho)*xbar - rho*sim[-1]
122     fxr = apply(func,(xr,)+args)
123     funcalls = funcalls + 1
124     doshrink = 0
125    
126     if fxr < fsim[0]:
127     xe = (1+rho*chi)*xbar - rho*chi*sim[-1]
128     fxe = apply(func,(xe,)+args)
129     funcalls = funcalls + 1
130    
131     if fxe < fxr:
132     sim[-1] = xe
133     fsim[-1] = fxe
134     else:
135     sim[-1] = xr
136     fsim[-1] = fxr
137     else: # fsim[0] <= fxr
138     if fxr < fsim[-2]:
139     sim[-1] = xr
140     fsim[-1] = fxr
141     else: # fxr >= fsim[-2]
142     # Perform contraction
143     if fxr < fsim[-1]:
144     xc = (1+psi*rho)*xbar - psi*rho*sim[-1]
145     fxc = apply(func,(xc,)+args)
146     funcalls = funcalls + 1
147    
148     if fxc <= fxr:
149     sim[-1] = xc
150     fsim[-1] = fxc
151     else:
152     doshrink=1
153     else:
154     # Perform an inside contraction
155     xcc = (1-psi)*xbar + psi*sim[-1]
156     fxcc = apply(func,(xcc,)+args)
157     funcalls = funcalls + 1
158    
159     if fxcc < fsim[-1]:
160     sim[-1] = xcc
161     fsim[-1] = fxcc
162     else:
163     doshrink = 1
164    
165     if doshrink:
166     for j in one2np1:
167     sim[j] = sim[0] + sigma*(sim[j] - sim[0])
168     fsim[j] = apply(func,(sim[j],)+args)
169     funcalls = funcalls + N
170    
171     ind = numarray.argsort(fsim)
172     sim = numarray.take(sim,ind,0)
173     fsim = numarray.take(fsim,ind)
174     iterations = iterations + 1
175    
176     x = sim[0]
177     fval = min(fsim)
178     warnflag = 0
179    
180     if funcalls >= maxfun:
181     warnflag = 1
182     if printmessg:
183     print "Warning: Maximum number of function evaluations has been exceeded."
184     elif iterations >= maxiter:
185     warnflag = 2
186     if printmessg:
187     print "Warning: Maximum number of iterations has been exceeded"
188     else:
189     if printmessg:
190     print "Optimization terminated successfully."
191     print " Current function value: %f" % fval
192     print " Iterations: %d" % iterations
193     print " Function evaluations: %d" % funcalls
194    
195     if fulloutput:
196     return x, fval, warnflag
197     else:
198     return x
199    
200    
201     def zoom(a_lo, a_hi):
202     pass
203    
204    
205    
206     def line_search(f, fprime, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=50):
207 ksteube 990 """alpha, fc, gc = line_search(f, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=1)
208 gross 865
209     minimize the function f(xk+alpha pk) using the line search algorithm of
210     Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 59-60
211     """
212    
213     fc = 0
214     gc = 0
215     alpha0 = 1.0
216     phi0 = apply(f,(xk,)+args)
217     phi_a0 = apply(f,(xk+alpha0*pk,)+args)
218     fc = fc + 2
219     derphi0 = numarray.dot(gfk,pk)
220     derphi_a0 = numarray.dot(apply(fprime,(xk+alpha0*pk,)+args),pk)
221     gc = gc + 1
222    
223     # check to see if alpha0 = 1 satisfies Strong Wolfe conditions.
224     if (phi_a0 <= phi0 + c1*alpha0*derphi0) \
225     and (numarray.absolute(derphi_a0) <= c2*numarray.absolute(derphi0)):
226     return alpha0, fc, gc
227    
228     alpha0 = 0
229     alpha1 = 1
230     phi_a1 = phi_a0
231     phi_a0 = phi0
232    
233     i = 1
234     while 1:
235     if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
236     ((phi_a1 >= phi_a0) and (i > 1)):
237     return zoom(alpha0, alpha1)
238    
239     derphi_a1 = numarray.dot(apply(fprime,(xk+alpha1*pk,)+args),pk)
240     gc = gc + 1
241     if (numarray.absolute(derphi_a1) <= -c2*derphi0):
242     return alpha1
243    
244     if (derphi_a1 >= 0):
245     return zoom(alpha1, alpha0)
246    
247     alpha2 = (amax-alpha1)*0.25 + alpha1
248     i = i + 1
249     alpha0 = alpha1
250     alpha1 = alpha2
251     phi_a0 = phi_a1
252     phi_a1 = apply(f,(xk+alpha1*pk,)+args)
253    
254    
255    
256     def line_search_BFGS(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1):
257 ksteube 990 """alpha, fc, gc = line_search(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1)
258 gross 865
259     minimize over alpha, the function f(xk+alpha pk) using the interpolation
260     algorithm (Armiijo backtracking) as suggested by
261     Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 56-57
262     """
263    
264     fc = 0
265     phi0 = apply(f,(xk,)+args) # compute f(xk)
266     phi_a0 = apply(f,(xk+alpha0*pk,)+args) # compute f
267     fc = fc + 2
268     derphi0 = numarray.dot(gfk,pk)
269    
270     if (phi_a0 <= phi0 + c1*alpha0*derphi0):
271     return alpha0, fc, 0
272    
273     # Otherwise compute the minimizer of a quadratic interpolant:
274    
275     alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
276     phi_a1 = apply(f,(xk+alpha1*pk,)+args)
277     fc = fc + 1
278    
279     if (phi_a1 <= phi0 + c1*alpha1*derphi0):
280     return alpha1, fc, 0
281    
282     # Otherwise loop with cubic interpolation until we find an alpha which satifies
283     # the first Wolfe condition (since we are backtracking, we will assume that
284     # the value of alpha is not too small and satisfies the second condition.
285    
286     while 1: # we are assuming pk is a descent direction
287     factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
288     a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
289     alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
290     a = a / factor
291     b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
292     alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
293     b = b / factor
294    
295     alpha2 = (-b + numarray.sqrt(numarray.absolute(b**2 - 3 * a * derphi0))) / (3.0*a)
296     phi_a2 = apply(f,(xk+alpha2*pk,)+args)
297     fc = fc + 1
298    
299     if (phi_a2 <= phi0 + c1*alpha2*derphi0):
300     return alpha2, fc, 0
301    
302     if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
303     alpha2 = alpha1 / 2.0
304    
305     alpha0 = alpha1
306     alpha1 = alpha2
307     phi_a0 = phi_a1
308     phi_a1 = phi_a2
309    
310     epsilon = 1e-8
311    
312     def approx_fprime(xk,f,*args):
313     f0 = apply(f,(xk,)+args)
314     grad = numarray.zeros((len(xk),),'d')
315     ei = numarray.zeros((len(xk),),'d')
316     for k in range(len(xk)):
317     ei[k] = 1.0
318     grad[k] = (apply(f,(xk+epsilon*ei,)+args) - f0)/epsilon
319     ei[k] = 0.0
320     return grad
321    
322     def approx_fhess_p(x0,p,fprime,*args):
323     f2 = apply(fprime,(x0+epsilon*p,)+args)
324     f1 = apply(fprime,(x0,)+args)
325     return (f2 - f1)/epsilon
326    
327    
328     def fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
329 ksteube 990 """xopt = fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1)
330 gross 865
331     Optimize the function, f, whose gradient is given by fprime using the
332     quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)
333     See Wright, and Nocedal 'numarrayerical Optimization', 1999, pg. 198.
334     """
335    
336     app_fprime = 0
337     if fprime is None:
338     app_fprime = 1
339    
340     x0 = numarray.asarray(x0)
341     if maxiter is None:
342     maxiter = len(x0)*200
343     func_calls = 0
344     grad_calls = 0
345     k = 0
346     N = len(x0)
347     gtol = N*avegtol
348     I = numarray.identity(N)
349     Hk = I
350    
351     if app_fprime:
352     gfk = apply(approx_fprime,(x0,f)+args)
353     func_calls = func_calls + len(x0) + 1
354     else:
355     gfk = apply(fprime,(x0,)+args)
356     grad_calls = grad_calls + 1
357     xk = x0
358     sk = [2*gtol]
359     while (numarray.add.reduce(numarray.absolute(gfk)) > gtol) and (k < maxiter):
360     pk = -numarray.dot(Hk,gfk)
361     alpha_k, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
362     func_calls = func_calls + fc
363     xkp1 = xk + alpha_k * pk
364     sk = xkp1 - xk
365     xk = xkp1
366     if app_fprime:
367     gfkp1 = apply(approx_fprime,(xkp1,f)+args)
368     func_calls = func_calls + gc + len(x0) + 1
369     else:
370     gfkp1 = apply(fprime,(xkp1,)+args)
371     grad_calls = grad_calls + gc + 1
372    
373     yk = gfkp1 - gfk
374     k = k + 1
375    
376     rhok = 1 / numarray.dot(yk,sk)
377     A1 = I - sk[:,numarray.NewAxis] * yk[numarray.NewAxis,:] * rhok
378     A2 = I - yk[:,numarray.NewAxis] * sk[numarray.NewAxis,:] * rhok
379     Hk = numarray.dot(A1,numarray.dot(Hk,A2)) + rhok * sk[:,numarray.NewAxis] * sk[numarray.NewAxis,:]
380     gfk = gfkp1
381    
382    
383     if printmessg or fulloutput:
384     fval = apply(f,(xk,)+args)
385     if k >= maxiter:
386     warnflag = 1
387     if printmessg:
388     print "Warning: Maximum number of iterations has been exceeded"
389     print " Current function value: %f" % fval
390     print " Iterations: %d" % k
391     print " Function evaluations: %d" % func_calls
392     print " Gradient evaluations: %d" % grad_calls
393     else:
394     warnflag = 0
395     if printmessg:
396     print "Optimization terminated successfully."
397     print " Current function value: %f" % fval
398     print " Iterations: %d" % k
399     print " Function evaluations: %d" % func_calls
400     print " Gradient evaluations: %d" % grad_calls
401    
402     if fulloutput:
403     return xk, fval, func_calls, grad_calls, warnflag
404     else:
405     return xk
406    
407    
408     def fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
409 ksteube 990 """xopt = fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1)
410 gross 865
411     Optimize the function, f, whose gradient is given by fprime using the
412     Newton-CG method. fhess_p must compute the hessian times an arbitrary
413     vector. If it is not given, finite-differences on fprime are used to
414     compute it. See Wright, and Nocedal 'numarrayerical Optimization', 1999,
415     pg. 140.
416     """
417    
418     x0 = numarray.asarray(x0)
419     fcalls = 0
420     gcalls = 0
421     hcalls = 0
422     approx_hessp = 0
423     if fhess_p is None and fhess is None: # Define hessian product
424     approx_hessp = 1
425    
426     xtol = len(x0)*avextol
427     update = [2*xtol]
428     xk = x0
429     k = 0
430     while (numarray.add.reduce(numarray.absolute(update)) > xtol) and (k < maxiter):
431     # Compute a search direction pk by applying the CG method to
432     # del2 f(xk) p = - grad f(xk) starting from 0.
433     b = -apply(fprime,(xk,)+args)
434     gcalls = gcalls + 1
435     maggrad = numarray.add.reduce(numarray.absolute(b))
436     eta = min([0.5,numarray.sqrt(maggrad)])
437     termcond = eta * maggrad
438     xsupi = numarray.zeros((len(x0),))
439     ri = -b
440     psupi = -ri
441     i = 0
442     dri0 = numarray.dot(ri,ri)
443    
444     if fhess is not None: # you want to compute hessian once.
445     A = apply(fhess,(xk,)+args)
446     hcalls = hcalls + 1
447    
448     while numarray.add.reduce(numarray.absolute(ri)) > termcond:
449     if fhess is None:
450     if approx_hessp:
451     Ap = apply(approx_fhess_p,(xk,psupi,fprime)+args)
452     gcalls = gcalls + 2
453     else:
454     Ap = apply(fhess_p,(xk,psupi)+args)
455     hcalls = hcalls + 1
456     else:
457     # Ap = numarray.dot(A,psupi)
458     Ap = numarray.matrixmultiply(A,psupi)
459     # check curvature
460     curv = numarray.dot(psupi,Ap)
461     if (curv <= 0):
462     if (i > 0):
463     break
464     else:
465     xsupi = xsupi + dri0/curv * psupi
466     break
467     alphai = dri0 / curv
468     xsupi = xsupi + alphai * psupi
469     ri = ri + alphai * Ap
470     dri1 = numarray.dot(ri,ri)
471     betai = dri1 / dri0
472     psupi = -ri + betai * psupi
473     i = i + 1
474     dri0 = dri1 # update numarray.dot(ri,ri) for next time.
475    
476     pk = xsupi # search direction is solution to system.
477     gfk = -b # gradient at xk
478     alphak, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
479     fcalls = fcalls + fc
480     gcalls = gcalls + gc
481    
482     update = alphak * pk
483     xk = xk + update
484     k = k + 1
485    
486     if printmessg or fulloutput:
487     fval = apply(f,(xk,)+args)
488     if k >= maxiter:
489     warnflag = 1
490     if printmessg:
491     print "Warning: Maximum number of iterations has been exceeded"
492     print " Current function value: %f" % fval
493     print " Iterations: %d" % k
494     print " Function evaluations: %d" % fcalls
495     print " Gradient evaluations: %d" % gcalls
496     print " Hessian evaluations: %d" % hcalls
497     else:
498     warnflag = 0
499     if printmessg:
500     print "Optimization terminated successfully."
501     print " Current function value: %f" % fval
502     print " Iterations: %d" % k
503     print " Function evaluations: %d" % fcalls
504     print " Gradient evaluations: %d" % gcalls
505     print " Hessian evaluations: %d" % hcalls
506    
507     if fulloutput:
508     return xk, fval, fcalls, gcalls, hcalls, warnflag
509     else:
510     return xk
511    
512    
513    
514     if __name__ == "__main__":
515     import string
516     import time
517    
518    
519     times = []
520     algor = []
521     x0 = [0.8,1.2,0.7]
522     start = time.time()
523     x = fmin(rosen,x0)
524     print x
525     times.append(time.time() - start)
526     algor.append('Nelder-Mead Simplex\t')
527    
528     start = time.time()
529     x = fminBFGS(rosen, x0, fprime=rosen_der, maxiter=80)
530     print x
531     times.append(time.time() - start)
532     algor.append('BFGS Quasi-Newton\t')
533    
534     start = time.time()
535     x = fminBFGS(rosen, x0, avegtol=1e-4, maxiter=100)
536     print x
537     times.append(time.time() - start)
538     algor.append('BFGS without gradient\t')
539    
540    
541     start = time.time()
542     x = fminNCG(rosen, x0, rosen_der, fhess_p=rosen3_hess_p, maxiter=80)
543     print x
544     times.append(time.time() - start)
545     algor.append('Newton-CG with hessian product')
546    
547    
548     start = time.time()
549     x = fminNCG(rosen, x0, rosen_der, fhess=rosen3_hess, maxiter=80)
550     print x
551     times.append(time.time() - start)
552     algor.append('Newton-CG with full hessian')
553    
554     print "\nMinimizing the Rosenbrock function of order 3\n"
555     print " Algorithm \t\t\t Seconds"
556     print "===========\t\t\t ========="
557     for k in range(len(algor)):
558     print algor[k], "\t -- ", times[k]
559    
560     times = []
561     algor=[]
562     x0 = [1.,]
563     start = time.time()
564     x = fmin(para,x0)
565     print x
566     times.append(time.time() - start)
567     algor.append('Nelder-Mead Simplex\t')
568    
569     start = time.time()
570     x = fminBFGS(para, x0, fprime=para_der, maxiter=80)
571     print x
572     times.append(time.time() - start)
573     algor.append('BFGS Quasi-Newton\t')
574    
575     start = time.time()
576     x = fminBFGS(para, x0, avegtol=1e-4, maxiter=100)
577     print x
578     times.append(time.time() - start)
579     algor.append('BFGS without gradient\t')
580    
581    
582     start = time.time()
583     x = fminNCG(para, x0, para_der, fhess_p=para_hess_p, maxiter=80)
584     print x
585     times.append(time.time() - start)
586     algor.append('Newton-CG with hessian product')
587    
588    
589     start = time.time()
590     x = fminNCG(para, x0, para_der, fhess=para_hess, maxiter=80)
591     print x
592     times.append(time.time() - start)
593     algor.append('Newton-CG with full hessian')
594    
595     print "\nMinimizing x^2\n"
596     print " Algorithm \t\t\t Seconds"
597     print "===========\t\t\t ========="
598     for k in range(len(algor)):
599     print algor[k], "\t -- ", times[k]

  ViewVC Help
Powered by ViewVC 1.1.26