/[escript]/trunk/escript/py_src/minimize.py
ViewVC logotype

Annotation of /trunk/escript/py_src/minimize.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 865 - (hide annotations)
Fri Oct 6 10:02:18 2006 UTC (13 years ago) by gross
File MIME type: text/x-python
File size: 19223 byte(s)
some functions for minimizing a function added. there is still some work required but successfully applied in an aplication
1 gross 865 # ******NOTICE***************
2     # optimize.py module by Travis E. Oliphant
3     #
4     # You may copy and use this module as you see fit with no
5     # guarantee implied provided you keep this notice in all copies.
6     # *****END NOTICE************
7    
8     # A collection of optimization algorithms. Version 0.3.1
9    
10     # Minimization routines
11     """optimize.py
12    
13     A collection of general-purpose optimization routines using numarrayeric
14    
15     fmin --- Nelder-Mead Simplex algorithm (uses only function calls)
16     fminBFGS --- Quasi-Newton method (uses function and gradient)
17     fminNCG --- Line-search Newton Conjugate Gradient (uses function, gradient
18     and hessian (if it's provided))
19    
20     """
21     import numarray
22     __version__="0.3.1"
23    
24     def para(x):
25     return (x[0]-2.)**2
26    
27     def para_der(x):
28     return 2*(x-2.)
29    
30     def para_hess(x):
31     return numarray.ones((1,1))*2.
32    
33     def para_hess_p(x,p):
34     return 2*p
35    
36     def rosen(x): # The Rosenbrock function
37     return numarray.sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
38    
39     def rosen_der(x):
40     xm = x[1:-1]
41     xm_m1 = x[:-2]
42     xm_p1 = x[2:]
43     der = numarray.zeros(x.shape,x.typecode())
44     der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
45     der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
46     der[-1] = 200*(x[-1]-x[-2]**2)
47     return der
48    
49     def rosen3_hess_p(x,p):
50     assert(len(x)==3)
51     assert(len(p)==3)
52     hessp = numarray.zeros((3,),x.typecode())
53     hessp[0] = (2 + 800*x[0]**2 - 400*(-x[0]**2 + x[1])) * p[0] \
54     - 400*x[0]*p[1] \
55     + 0
56     hessp[1] = - 400*x[0]*p[0] \
57     + (202 + 800*x[1]**2 - 400*(-x[1]**2 + x[2]))*p[1] \
58     - 400*x[1] * p[2]
59     hessp[2] = 0 \
60     - 400*x[1] * p[1] \
61     + 200 * p[2]
62    
63     return hessp
64    
65     def rosen3_hess(x):
66     assert(len(x)==3)
67     hessp = numarray.zeros((3,3),x.typecode())
68     hessp[0,:] = [2 + 800*x[0]**2 -400*(-x[0]**2 + x[1]), -400*x[0], 0]
69     hessp[1,:] = [-400*x[0], 202+800*x[1]**2 -400*(-x[1]**2 + x[2]), -400*x[1]]
70     hessp[2,:] = [0,-400*x[1], 200]
71     return hessp
72    
73    
74     def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None, fulloutput=0, printmessg=1):
75     """xopt,{fval,warnflag} = fmin(function, x0, args=(), xtol=1e-4, ftol=1e-4,
76     maxiter=200*len(x0), maxfun=200*len(x0), fulloutput=0, printmessg=0)
77    
78     Uses a Nelder-Mead Simplex algorithm to find the minimum of function
79     of one or more variables.
80     """
81     x0 = numarray.asarray(x0)
82     assert (len(x0.shape)==1)
83     N = len(x0)
84     if maxiter is None:
85     maxiter = N * 200
86     if maxfun is None:
87     maxfun = N * 200
88    
89     rho = 1; chi = 2; psi = 0.5; sigma = 0.5;
90     one2np1 = range(1,N+1)
91    
92     sim = numarray.zeros((N+1,N),x0.typecode())
93     fsim = numarray.zeros((N+1,),'d')
94     sim[0] = x0
95     fsim[0] = apply(func,(x0,)+args)
96     nonzdelt = 0.05
97     zdelt = 0.00025
98     for k in range(0,N):
99     y = numarray.array(x0,copy=1)
100     if y[k] != 0:
101     y[k] = (1+nonzdelt)*y[k]
102     else:
103     y[k] = zdelt
104    
105     sim[k+1] = y
106     f = apply(func,(y,)+args)
107     fsim[k+1] = f
108    
109     ind = numarray.argsort(fsim)
110     fsim = numarray.take(fsim,ind) # sort so sim[0,:] has the lowest function value
111     sim = numarray.take(sim,ind,0)
112    
113     iterations = 1
114     funcalls = N+1
115    
116     while (funcalls < maxfun and iterations < maxiter):
117     if (max(numarray.ravel(numarray.absolute(sim[1:]-sim[0]))) <= xtol \
118     and max(numarray.absolute(fsim[0]-fsim[1:])) <= ftol):
119     break
120    
121     xbar = numarray.add.reduce(sim[:-1],0) / N
122     xr = (1+rho)*xbar - rho*sim[-1]
123     fxr = apply(func,(xr,)+args)
124     funcalls = funcalls + 1
125     doshrink = 0
126    
127     if fxr < fsim[0]:
128     xe = (1+rho*chi)*xbar - rho*chi*sim[-1]
129     fxe = apply(func,(xe,)+args)
130     funcalls = funcalls + 1
131    
132     if fxe < fxr:
133     sim[-1] = xe
134     fsim[-1] = fxe
135     else:
136     sim[-1] = xr
137     fsim[-1] = fxr
138     else: # fsim[0] <= fxr
139     if fxr < fsim[-2]:
140     sim[-1] = xr
141     fsim[-1] = fxr
142     else: # fxr >= fsim[-2]
143     # Perform contraction
144     if fxr < fsim[-1]:
145     xc = (1+psi*rho)*xbar - psi*rho*sim[-1]
146     fxc = apply(func,(xc,)+args)
147     funcalls = funcalls + 1
148    
149     if fxc <= fxr:
150     sim[-1] = xc
151     fsim[-1] = fxc
152     else:
153     doshrink=1
154     else:
155     # Perform an inside contraction
156     xcc = (1-psi)*xbar + psi*sim[-1]
157     fxcc = apply(func,(xcc,)+args)
158     funcalls = funcalls + 1
159    
160     if fxcc < fsim[-1]:
161     sim[-1] = xcc
162     fsim[-1] = fxcc
163     else:
164     doshrink = 1
165    
166     if doshrink:
167     for j in one2np1:
168     sim[j] = sim[0] + sigma*(sim[j] - sim[0])
169     fsim[j] = apply(func,(sim[j],)+args)
170     funcalls = funcalls + N
171    
172     ind = numarray.argsort(fsim)
173     sim = numarray.take(sim,ind,0)
174     fsim = numarray.take(fsim,ind)
175     iterations = iterations + 1
176    
177     x = sim[0]
178     fval = min(fsim)
179     warnflag = 0
180    
181     if funcalls >= maxfun:
182     warnflag = 1
183     if printmessg:
184     print "Warning: Maximum number of function evaluations has been exceeded."
185     elif iterations >= maxiter:
186     warnflag = 2
187     if printmessg:
188     print "Warning: Maximum number of iterations has been exceeded"
189     else:
190     if printmessg:
191     print "Optimization terminated successfully."
192     print " Current function value: %f" % fval
193     print " Iterations: %d" % iterations
194     print " Function evaluations: %d" % funcalls
195    
196     if fulloutput:
197     return x, fval, warnflag
198     else:
199     return x
200    
201    
202     def zoom(a_lo, a_hi):
203     pass
204    
205    
206    
207     def line_search(f, fprime, xk, pk, gfk, args=(), c1=1e-4, c2=0.9, amax=50):
208     """alpha, fc, gc = line_search(f, xk, pk, gfk,
209     args=(), c1=1e-4, c2=0.9, amax=1)
210    
211     minimize the function f(xk+alpha pk) using the line search algorithm of
212     Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 59-60
213     """
214    
215     fc = 0
216     gc = 0
217     alpha0 = 1.0
218     phi0 = apply(f,(xk,)+args)
219     phi_a0 = apply(f,(xk+alpha0*pk,)+args)
220     fc = fc + 2
221     derphi0 = numarray.dot(gfk,pk)
222     derphi_a0 = numarray.dot(apply(fprime,(xk+alpha0*pk,)+args),pk)
223     gc = gc + 1
224    
225     # check to see if alpha0 = 1 satisfies Strong Wolfe conditions.
226     if (phi_a0 <= phi0 + c1*alpha0*derphi0) \
227     and (numarray.absolute(derphi_a0) <= c2*numarray.absolute(derphi0)):
228     return alpha0, fc, gc
229    
230     alpha0 = 0
231     alpha1 = 1
232     phi_a1 = phi_a0
233     phi_a0 = phi0
234    
235     i = 1
236     while 1:
237     if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
238     ((phi_a1 >= phi_a0) and (i > 1)):
239     return zoom(alpha0, alpha1)
240    
241     derphi_a1 = numarray.dot(apply(fprime,(xk+alpha1*pk,)+args),pk)
242     gc = gc + 1
243     if (numarray.absolute(derphi_a1) <= -c2*derphi0):
244     return alpha1
245    
246     if (derphi_a1 >= 0):
247     return zoom(alpha1, alpha0)
248    
249     alpha2 = (amax-alpha1)*0.25 + alpha1
250     i = i + 1
251     alpha0 = alpha1
252     alpha1 = alpha2
253     phi_a0 = phi_a1
254     phi_a1 = apply(f,(xk+alpha1*pk,)+args)
255    
256    
257    
258     def line_search_BFGS(f, xk, pk, gfk, args=(), c1=1e-4, alpha0=1):
259     """alpha, fc, gc = line_search(f, xk, pk, gfk,
260     args=(), c1=1e-4, alpha0=1)
261    
262     minimize over alpha, the function f(xk+alpha pk) using the interpolation
263     algorithm (Armiijo backtracking) as suggested by
264     Wright and Nocedal in 'numarrayerical Optimization', 1999, pg. 56-57
265     """
266    
267     fc = 0
268     phi0 = apply(f,(xk,)+args) # compute f(xk)
269     phi_a0 = apply(f,(xk+alpha0*pk,)+args) # compute f
270     fc = fc + 2
271     derphi0 = numarray.dot(gfk,pk)
272    
273     if (phi_a0 <= phi0 + c1*alpha0*derphi0):
274     return alpha0, fc, 0
275    
276     # Otherwise compute the minimizer of a quadratic interpolant:
277    
278     alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
279     phi_a1 = apply(f,(xk+alpha1*pk,)+args)
280     fc = fc + 1
281    
282     if (phi_a1 <= phi0 + c1*alpha1*derphi0):
283     return alpha1, fc, 0
284    
285     # Otherwise loop with cubic interpolation until we find an alpha which satifies
286     # the first Wolfe condition (since we are backtracking, we will assume that
287     # the value of alpha is not too small and satisfies the second condition.
288    
289     while 1: # we are assuming pk is a descent direction
290     factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
291     a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
292     alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
293     a = a / factor
294     b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
295     alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
296     b = b / factor
297    
298     alpha2 = (-b + numarray.sqrt(numarray.absolute(b**2 - 3 * a * derphi0))) / (3.0*a)
299     phi_a2 = apply(f,(xk+alpha2*pk,)+args)
300     fc = fc + 1
301    
302     if (phi_a2 <= phi0 + c1*alpha2*derphi0):
303     return alpha2, fc, 0
304    
305     if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
306     alpha2 = alpha1 / 2.0
307    
308     alpha0 = alpha1
309     alpha1 = alpha2
310     phi_a0 = phi_a1
311     phi_a1 = phi_a2
312    
313     epsilon = 1e-8
314    
315     def approx_fprime(xk,f,*args):
316     f0 = apply(f,(xk,)+args)
317     grad = numarray.zeros((len(xk),),'d')
318     ei = numarray.zeros((len(xk),),'d')
319     for k in range(len(xk)):
320     ei[k] = 1.0
321     grad[k] = (apply(f,(xk+epsilon*ei,)+args) - f0)/epsilon
322     ei[k] = 0.0
323     return grad
324    
325     def approx_fhess_p(x0,p,fprime,*args):
326     f2 = apply(fprime,(x0+epsilon*p,)+args)
327     f1 = apply(fprime,(x0,)+args)
328     return (f2 - f1)/epsilon
329    
330    
331     def fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
332     """xopt = fminBFGS(f, x0, fprime=None, args=(), avegtol=1e-5,
333     maxiter=None, fulloutput=0, printmessg=1)
334    
335     Optimize the function, f, whose gradient is given by fprime using the
336     quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)
337     See Wright, and Nocedal 'numarrayerical Optimization', 1999, pg. 198.
338     """
339    
340     app_fprime = 0
341     if fprime is None:
342     app_fprime = 1
343    
344     x0 = numarray.asarray(x0)
345     if maxiter is None:
346     maxiter = len(x0)*200
347     func_calls = 0
348     grad_calls = 0
349     k = 0
350     N = len(x0)
351     gtol = N*avegtol
352     I = numarray.identity(N)
353     Hk = I
354    
355     if app_fprime:
356     gfk = apply(approx_fprime,(x0,f)+args)
357     func_calls = func_calls + len(x0) + 1
358     else:
359     gfk = apply(fprime,(x0,)+args)
360     grad_calls = grad_calls + 1
361     xk = x0
362     sk = [2*gtol]
363     while (numarray.add.reduce(numarray.absolute(gfk)) > gtol) and (k < maxiter):
364     pk = -numarray.dot(Hk,gfk)
365     alpha_k, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
366     func_calls = func_calls + fc
367     xkp1 = xk + alpha_k * pk
368     sk = xkp1 - xk
369     xk = xkp1
370     if app_fprime:
371     gfkp1 = apply(approx_fprime,(xkp1,f)+args)
372     func_calls = func_calls + gc + len(x0) + 1
373     else:
374     gfkp1 = apply(fprime,(xkp1,)+args)
375     grad_calls = grad_calls + gc + 1
376    
377     yk = gfkp1 - gfk
378     k = k + 1
379    
380     rhok = 1 / numarray.dot(yk,sk)
381     A1 = I - sk[:,numarray.NewAxis] * yk[numarray.NewAxis,:] * rhok
382     A2 = I - yk[:,numarray.NewAxis] * sk[numarray.NewAxis,:] * rhok
383     Hk = numarray.dot(A1,numarray.dot(Hk,A2)) + rhok * sk[:,numarray.NewAxis] * sk[numarray.NewAxis,:]
384     gfk = gfkp1
385    
386    
387     if printmessg or fulloutput:
388     fval = apply(f,(xk,)+args)
389     if k >= maxiter:
390     warnflag = 1
391     if printmessg:
392     print "Warning: Maximum number of iterations has been exceeded"
393     print " Current function value: %f" % fval
394     print " Iterations: %d" % k
395     print " Function evaluations: %d" % func_calls
396     print " Gradient evaluations: %d" % grad_calls
397     else:
398     warnflag = 0
399     if printmessg:
400     print "Optimization terminated successfully."
401     print " Current function value: %f" % fval
402     print " Iterations: %d" % k
403     print " Function evaluations: %d" % func_calls
404     print " Gradient evaluations: %d" % grad_calls
405    
406     if fulloutput:
407     return xk, fval, func_calls, grad_calls, warnflag
408     else:
409     return xk
410    
411    
412     def fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, maxiter=None, fulloutput=0, printmessg=1):
413     """xopt = fminNCG(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5,
414     maxiter=None, fulloutput=0, printmessg=1)
415    
416     Optimize the function, f, whose gradient is given by fprime using the
417     Newton-CG method. fhess_p must compute the hessian times an arbitrary
418     vector. If it is not given, finite-differences on fprime are used to
419     compute it. See Wright, and Nocedal 'numarrayerical Optimization', 1999,
420     pg. 140.
421     """
422    
423     x0 = numarray.asarray(x0)
424     fcalls = 0
425     gcalls = 0
426     hcalls = 0
427     approx_hessp = 0
428     if fhess_p is None and fhess is None: # Define hessian product
429     approx_hessp = 1
430    
431     xtol = len(x0)*avextol
432     update = [2*xtol]
433     xk = x0
434     k = 0
435     while (numarray.add.reduce(numarray.absolute(update)) > xtol) and (k < maxiter):
436     # Compute a search direction pk by applying the CG method to
437     # del2 f(xk) p = - grad f(xk) starting from 0.
438     b = -apply(fprime,(xk,)+args)
439     gcalls = gcalls + 1
440     maggrad = numarray.add.reduce(numarray.absolute(b))
441     eta = min([0.5,numarray.sqrt(maggrad)])
442     termcond = eta * maggrad
443     xsupi = numarray.zeros((len(x0),))
444     ri = -b
445     psupi = -ri
446     i = 0
447     dri0 = numarray.dot(ri,ri)
448    
449     if fhess is not None: # you want to compute hessian once.
450     A = apply(fhess,(xk,)+args)
451     hcalls = hcalls + 1
452    
453     while numarray.add.reduce(numarray.absolute(ri)) > termcond:
454     if fhess is None:
455     if approx_hessp:
456     Ap = apply(approx_fhess_p,(xk,psupi,fprime)+args)
457     gcalls = gcalls + 2
458     else:
459     Ap = apply(fhess_p,(xk,psupi)+args)
460     hcalls = hcalls + 1
461     else:
462     # Ap = numarray.dot(A,psupi)
463     Ap = numarray.matrixmultiply(A,psupi)
464     # check curvature
465     curv = numarray.dot(psupi,Ap)
466     if (curv <= 0):
467     if (i > 0):
468     break
469     else:
470     xsupi = xsupi + dri0/curv * psupi
471     break
472     alphai = dri0 / curv
473     xsupi = xsupi + alphai * psupi
474     ri = ri + alphai * Ap
475     dri1 = numarray.dot(ri,ri)
476     betai = dri1 / dri0
477     psupi = -ri + betai * psupi
478     i = i + 1
479     dri0 = dri1 # update numarray.dot(ri,ri) for next time.
480    
481     pk = xsupi # search direction is solution to system.
482     gfk = -b # gradient at xk
483     alphak, fc, gc = line_search_BFGS(f,xk,pk,gfk,args)
484     fcalls = fcalls + fc
485     gcalls = gcalls + gc
486    
487     update = alphak * pk
488     xk = xk + update
489     k = k + 1
490    
491     if printmessg or fulloutput:
492     fval = apply(f,(xk,)+args)
493     if k >= maxiter:
494     warnflag = 1
495     if printmessg:
496     print "Warning: Maximum number of iterations has been exceeded"
497     print " Current function value: %f" % fval
498     print " Iterations: %d" % k
499     print " Function evaluations: %d" % fcalls
500     print " Gradient evaluations: %d" % gcalls
501     print " Hessian evaluations: %d" % hcalls
502     else:
503     warnflag = 0
504     if printmessg:
505     print "Optimization terminated successfully."
506     print " Current function value: %f" % fval
507     print " Iterations: %d" % k
508     print " Function evaluations: %d" % fcalls
509     print " Gradient evaluations: %d" % gcalls
510     print " Hessian evaluations: %d" % hcalls
511    
512     if fulloutput:
513     return xk, fval, fcalls, gcalls, hcalls, warnflag
514     else:
515     return xk
516    
517    
518    
519     if __name__ == "__main__":
520     import string
521     import time
522    
523    
524     times = []
525     algor = []
526     x0 = [0.8,1.2,0.7]
527     start = time.time()
528     x = fmin(rosen,x0)
529     print x
530     times.append(time.time() - start)
531     algor.append('Nelder-Mead Simplex\t')
532    
533     start = time.time()
534     x = fminBFGS(rosen, x0, fprime=rosen_der, maxiter=80)
535     print x
536     times.append(time.time() - start)
537     algor.append('BFGS Quasi-Newton\t')
538    
539     start = time.time()
540     x = fminBFGS(rosen, x0, avegtol=1e-4, maxiter=100)
541     print x
542     times.append(time.time() - start)
543     algor.append('BFGS without gradient\t')
544    
545    
546     start = time.time()
547     x = fminNCG(rosen, x0, rosen_der, fhess_p=rosen3_hess_p, maxiter=80)
548     print x
549     times.append(time.time() - start)
550     algor.append('Newton-CG with hessian product')
551    
552    
553     start = time.time()
554     x = fminNCG(rosen, x0, rosen_der, fhess=rosen3_hess, maxiter=80)
555     print x
556     times.append(time.time() - start)
557     algor.append('Newton-CG with full hessian')
558    
559     print "\nMinimizing the Rosenbrock function of order 3\n"
560     print " Algorithm \t\t\t Seconds"
561     print "===========\t\t\t ========="
562     for k in range(len(algor)):
563     print algor[k], "\t -- ", times[k]
564    
565     times = []
566     algor=[]
567     x0 = [1.,]
568     start = time.time()
569     x = fmin(para,x0)
570     print x
571     times.append(time.time() - start)
572     algor.append('Nelder-Mead Simplex\t')
573    
574     start = time.time()
575     x = fminBFGS(para, x0, fprime=para_der, maxiter=80)
576     print x
577     times.append(time.time() - start)
578     algor.append('BFGS Quasi-Newton\t')
579    
580     start = time.time()
581     x = fminBFGS(para, x0, avegtol=1e-4, maxiter=100)
582     print x
583     times.append(time.time() - start)
584     algor.append('BFGS without gradient\t')
585    
586    
587     start = time.time()
588     x = fminNCG(para, x0, para_der, fhess_p=para_hess_p, maxiter=80)
589     print x
590     times.append(time.time() - start)
591     algor.append('Newton-CG with hessian product')
592    
593    
594     start = time.time()
595     x = fminNCG(para, x0, para_der, fhess=para_hess, maxiter=80)
596     print x
597     times.append(time.time() - start)
598     algor.append('Newton-CG with full hessian')
599    
600     print "\nMinimizing x^2\n"
601     print " Algorithm \t\t\t Seconds"
602     print "===========\t\t\t ========="
603     for k in range(len(algor)):
604     print algor[k], "\t -- ", times[k]

  ViewVC Help
Powered by ViewVC 1.1.26