/[escript]/trunk/downunder/py_src/splitminimizers.py
ViewVC logotype

Contents of /trunk/downunder/py_src/splitminimizers.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5706 - (show annotations)
Mon Jun 29 03:41:36 2015 UTC (4 years, 2 months ago) by sshaw
File MIME type: text/x-python
File size: 23832 byte(s)
all python files now force use of python3 prints and division syntax to stop sneaky errors appearing in py3 environs
1 ##############################################################################
2 #
3 # Copyright (c) 2014-2015 by The University of Queensland
4 # http://www.uq.edu.au
5 #
6 # Primary Business: Queensland, Australia
7 # Licensed under the Open Software License version 3.0
8 # http://www.opensource.org/licenses/osl-3.0.php
9 #
10 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
11 # Development 2012-2013 by School of Earth Sciences
12 # Development from 2014 by Centre for Geoscience Computing (GeoComp)
13 #
14 ##############################################################################
15
16 from __future__ import print_function, division
17
18 from .minimizers import AbstractMinimizer
19 from esys.escriptcore.splitworld import Job, FunctionJob
20 from esys.escript import addJob, addJobPerWorld
21 from .splitinversioncostfunctions import SplitInversionCostFunction
22 from esys.escript.pdetools import ArithmeticTuple
23 import numpy as np
24
25 class SplitMinimizerLBFGS(AbstractMinimizer):
26 """
27 Minimizer that uses the limited-memory Broyden-Fletcher-Goldfarb-Shanno
28 method.
29
30 version modified to fit with split world.
31 """
32
33 # History size
34 _truncation = 30
35
36 # Initial Hessian multiplier
37 _initial_H = 1
38
39 # Restart after this many iteration steps
40 _restart = 60
41
42 def getOptions(self):
43 return {'truncation':self._truncation,'initialHessian':self._initial_H, 'restart':self._restart}
44
45 def setOptions(self, **opts):
46 self.logger.debug("Setting options: %s"%(str(opts)))
47 for o in opts:
48 if o=='historySize' or o=='truncation':
49 self._truncation=opts[o]
50 elif o=='initialHessian':
51 self._initial_H=opts[o]
52 elif o=='restart':
53 self._restart=opts[o]
54 else:
55 raise KeyError("Invalid option '%s'"%o)
56
57 # This function sets current_point=base_point+alpha*search_direction [m=m+p*a]
58 @staticmethod
59 def move_point_from_base(self, **kwargs):
60 m=self.importValue('base_point')
61 p=self.importValue('search_direction')
62 a=kwargs['alpha']
63 newpoint=m+p*a
64 SplitInversionCostFunction.update_point_helper(self, newpoint)
65
66 def run(self):
67 """
68 This version relies on the costfunction already having an initial guess loaded.
69 It also does not return the result, meaning a job needs to be submitted to
70 get the result out.
71 """
72
73 # First we'll define our own versions of the helper functions
74
75 # Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
76 # then returns f.dualProduct of search_direction and g_Jx_new
77 # Not a splitworld function
78 def grad_phi(f, **kwargs):
79 f.calculateGradient('g_Jx_0', 'g_Jx_1')
80 # need to call dualProduct here
81 def dual_p_g_Jx_new(self, **kwargs):
82 p=self.importValue("search_direction")
83 g_Jx_0=self.importValue("g_Jx_0")
84 g_Jx_1=self.importValue("g_Jx_1")
85 reg=self.importValue("regularization")
86 #again, this assumes that only the regularization term is relevant
87 res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
88 self.exportValue("dp_result",res)
89 # Now we will only run this on one world and rely on getDouble to ship it
90 addJob(f.splitworld, FunctionJob, dual_p_g_Jx_new)
91 f.splitworld.runJobs()
92 res=f.splitworld.getDoubleVariable("dp_result")
93 return res
94 #End of grad_phi
95
96 def _zoom(f, alpha_lo, alpha_hi, phi_lo, phi_hi, c1, c2,
97 phi0, gphi0, IMAX=25):
98 """
99 Helper function for `line_search` below which tries to tighten the range
100 alpha_lo...alpha_hi. See Chapter 3 of 'Numerical Optimization' by
101 J. Nocedal for an explanation.
102 """
103 i=0
104 while True:
105 alpha=alpha_lo+.5*(alpha_hi-alpha_lo) # should use interpolation...
106 addJobPerWorld(f.splitworld, FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['base_point', 'search_direction'])
107 f.splitworld.runJobs()
108 f.calculateValue('phi_a')
109 phi_a=f.splitworld.getDoubleVariable('phi_a')
110 #zoomlogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
111 if phi_a > phi0+c1*alpha*gphi0 or phi_a >= phi_lo:
112 alpha_hi=alpha
113 else:
114 gphi_a=grad_phi(f)
115 #zoomlogger.debug("\tgrad(phi(alpha))=%e"%(gphi_a))
116 if np.abs(gphi_a) <= -c2*gphi0:
117 break
118 if gphi_a*(alpha_hi-alpha_lo) >= 0:
119 alpha_hi = alpha_lo
120 alpha_lo=alpha
121 phi_lo=phi_a
122 i+=1
123 if i>IMAX:
124 gphi_a=None
125 break
126 return alpha, phi_a, gphi_a
127
128 def line_search(f, alpha=1.0, alpha_truncationax=50.0,
129 c1=1e-4, c2=0.9, IMAX=15):
130 """
131 Line search method that satisfies the strong Wolfe conditions.
132 See Chapter 3 of 'Numerical Optimization' by J. Nocedal for an explanation.
133
134 This version is converted from the line_search from minimizers.py
135 however, it takes fewer parameters because some of the values needed
136 by the original version will be available as subworld variables rather
137 than as parameters.
138
139
140 :param f: callable objective function f(x)
141 :param p: search direction
142 :param alpha: initial step length. If g_Jx is properly scaled alpha=1 is a
143 reasonable starting value.
144 :param alpha_truncationax: algorithm terminates if alpha reaches this value
145 :param c1: value for Armijo condition (see reference)
146 :param c2: value for curvature condition (see reference)
147 :param IMAX: maximum number of iterations to perform
148
149 Removed parameters (now in subworld variables instead):
150 x - The start value for line search: in the variable "current_point".
151 p - search direction: in the variable "search_direction"
152 g_Jx - value for the gradient of f at x: in the variables "g_Jx_0" and "g_Jx_1"
153 Jx - value of f(x): in the variable "Jx"
154 """
155
156 # This will handle subworld side of work
157 def line_search_init_worker(self, **kwargs):
158 x=self.importValue("current_point")
159 p=self.importValue("search_direction")
160 g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
161 Jx=self.importValue("Jx")
162 regular=self.importValue("regularization")
163 phi0=Jx
164
165 print("Type of phi0=", type(phi0),phi0)
166 # In the original, this part calls getDualProduct on f
167 # However, since that only ends up referring to the
168 # regularisation term, I've called that directly
169 # If your dual product operation requires access to
170 # the other models, then this step needs
171 # a rethink since not all models are local
172 gphi0=regular.getDualProduct(p, g_Jx)
173
174 #Still need to decide what this worker will do
175 old_phi_a=phi0
176 phi_a=phi0
177 self.exportValue("old_phi_a", old_phi_a)
178 self.exportValue("phi_a", phi_a)
179 self.exportValue("gphi0", gphi0)
180 self.exportValue("phi0", phi0)
181 self.exportValue("base_point", x) # To ensure we can revert if needed
182 #End of line_search_init_worker
183
184 old_alpha=0
185 i=1
186 addJobPerWorld(f.splitworld, FunctionJob, line_search_init_worker, imports=['search_direction', 'g_Jx_0', 'g_Jx_1', 'Jx', 'regularization'])
187 f.splitworld.runJobs()
188
189
190 # Updates "g_Jx_new_0" and "g_Jx_new_1" to the gradient at the current point
191 # then returns f.dualProduct of search_direction and g_Jx_new
192 # Not a splitworld function
193 def grad_phi(f, **kwargs):
194 f.calculateGradient('g_Jx_0', 'g_Jx_1')
195 # need to call dualProduct here
196 def dual_p_g_Jx_new(self, **kwargs):
197 p=self.importValue("search_direction")
198 g_Jx_0=self.importValue("g_Jx_0")
199 g_Jx_1=self.importValue("g_Jx_1")
200 reg=self.importValue("regularization")
201 #again, this assumes that only the regularization term is relevant
202 res=reg.getDualProduct(p, (g_Jx_0, g_Jx_1))
203 self.exportValue("dp_result",res)
204 # Now we will only run this on one world and rely on getDouble to ship it
205 addJob(f.splitworld, FunctionJob, dual_p_g_Jx_new)
206 f.splitworld.runJobs()
207 res=f.splitworld.getDoubleValue("dp_result")
208 return res
209 #End of grad_phi
210
211 while i<IMAX and alpha>0. and alpha<alpha_truncationax:
212 alpha_at_loop_start=alpha
213 addJobPerWorld(f.splitworld, FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
214 f.splitworld.runJobs()
215 f.calculateValue('phi_a')
216 #lslogger.debug("iteration %d, alpha=%e, phi(alpha)=%e"%(i,alpha,phi_a))
217 phi_a=f.splitworld.getDoubleVariable('phi_a')
218 phi0=f.splitworld.getDoubleVariable('phi0')
219 gphi0=f.splitworld.getDoubleVariable('gphi0')
220 old_phi_a=f.splitworld.getDoubleVariable('old_phi_a')
221 if (phi_a > phi0+c1*alpha*gphi0) or ((phi_a>=old_phi_a) and (i>1)):
222 alpha, phi_a, gphi_a = _zoom(f, old_alpha, alpha, old_phi_a, phi_a, c1, c2, phi0, gphi0)
223 break
224
225 # Need to check if alpha has changed. If it has, we need to move the point again
226 if alpha_at_loop_start!=alpha:
227 addJobPerWorld(f.splitworld, FunctionJob, SplitMinimizerLBFGS.move_point_from_base, alpha=alpha, imports=['current_point', 'search_direction'])
228 f.splitworld.runJobs()
229
230 gphi_a=grad_phi(f)
231 if np.abs(gphi_a) <= -c2*gphi0:
232 break
233 if gphi_a >= 0:
234 alpha, phi_a, gphi_a = _zoom(phi, gradphi, phiargs, alpha, old_alpha, phi_a, old_phi_a, c1, c2, phi0, gphi0)
235 break
236
237 old_alpha=alpha
238 # the factor is arbitrary as long as there is sufficient increase
239 alpha=2.*alpha
240 old_phi_a=phi_a
241 i+=1
242 #return alpha, phi_a, g_Jx_new[0]
243 return alpha, phi_a
244 #End of line_search
245
246
247
248
249
250
251
252 splitworld=self.getCostFunction().splitworld
253 if self.getCostFunction().provides_inverse_Hessian_approximation:
254 self.getCostFunction().updateHessian()
255 invH_scale = None
256 else:
257 invH_scale = self._initial_H
258
259 # start the iteration:
260 n_iter = 0
261 n_last_break_down=-1
262 non_curable_break_down = False
263 converged = False
264
265
266
267
268 self.getCostFunction().setPoint() # Set point to initial guess value (takes the place of a getArgs call)
269 #args=self.getCostFunction().getArguments(x)
270
271 self.getCostFunction().calculateValue("Jx") #evaluate the function and store the result in the named variable
272 # note that call sets Jx=Jx_original
273 splitworld.copyVariable("Jx", "Jx_original")
274
275 self.getCostFunction().calculateGradient("g_Jx_0","g_Jx_1") #compute the gradient and store the result
276
277 while not converged and not non_curable_break_down and n_iter < self._imax:
278 k=0
279 break_down = False
280 reset_s_and_y = True
281 # initial step length for line search
282 alpha=1.0
283 #self._doCallback(n_iter, x, Jx, g_Jx)
284
285 while not converged and not break_down and k < self._restart and n_iter < self._imax:
286 #self.logger.info("\033[1;31miteration %d\033[1;30m"%n_iter)
287 self.logger.info("********** iteration %3d **********"%n_iter)
288 #self.logger.info("\tJ(x) = %s"%Jx)
289 #self.logger.debug("\tgrad f(x) = %s"%g_Jx)
290 if invH_scale:
291 self.logger.debug("\tH = %s"%invH_scale)
292
293 splitworld.copyVariable("g_Jx_0", "old_g_Jx_0")
294 splitworld.copyVariable("g_Jx_1", "old_g_Jx_1")
295 splitworld.copyVariable("Jx", "Jx_old")
296
297 # determine search direction
298 self._twoLoop(self.getCostFunction().splitworld, reset_s_and_y)
299 reset_s_and_y = False
300
301 # determine new step length using the last one as initial value
302 # however, avoid using too small steps for too long.
303 # FIXME: This is a bit specific to esys.downunder in that the
304 # inverse Hessian approximation is not scaled properly (only
305 # the regularization term is used at the moment)...
306 if invH_scale is None:
307 if alpha <= 0.5:
308 alpha=2*alpha
309 else:
310 # reset alpha for the case that the cost function does not
311 # provide an approximation of inverse H
312 alpha=1.0
313 alpha, phi_a = line_search(self.getCostFunction(), alpha)
314 # this function returns a scaling alpha for the search
315 # direction as well as the cost function evaluation and
316 # gradient for the new solution approximation x_new=x+alpha*p
317 print("alpha=",alpha)
318 self.logger.debug("\tSearch direction scaling alpha=%e"%alpha)
319
320 # execute the step
321 # This update operation has already been done in the line_search
322 #delta_x = alpha*p
323 #x_new = x + delta_x
324
325 Jx=splitworld.getDoubleVariable("Jx")
326 converged = True
327 if self._J_tol:
328 Jx_old=splitworld.getDoubleVariable("Jx_old")
329 Jx_original=splitworld.getDoubleVariable("Jx_original")
330 flag=abs(Jx-Jx_old) <= self._J_tol * abs(Jx-Jx_original)
331 #flag=abs(Jx_new-Jx) <= self._J_tol * abs(Jx_new-Jx_0)
332 if self.logger.isEnabledFor(logging.DEBUG):
333 if flag:
334 self.logger.debug("Cost function has converged: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx-Jx_original)*self._J_tol))
335 else:
336 self.logger.debug("Cost function checked: dJ, J*J_tol = %e, %e"%(Jx_old-Jx,abs(Jx)*self._J_tol))
337
338 converged = converged and flag
339 if self._m_tol:
340 def converged_check(self, **kwargs):
341 alpha=kwargs["alpha"]
342 m_tol=kwargs["m_tol"]
343 reg=self.importValue("regularization")
344 p=self.importValue("search_direction")
345 delta_x=alpha*p
346 x=self.importValue("current_point")
347 norm_x = reg.getNorm(x)
348 norm_dx = reg.getNorm(delta_x)
349 flag = norm_dx <= m_tol * norm_x
350 #if self.logger.isEnabledFor(logging.DEBUG):
351 #if flag:
352 #self.logger.debug("Solution has converged: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
353 #else:
354 #self.logger.debug("Solution checked: dx, x*m_tol = %e, %e"%(norm_dx,norm_x*self._m_tol))
355 self.exportValue('conv_flag', flag)
356 # End of converged_check
357 addJobPerWorld(self.getCostFunction().splitworld, FunctionJob, converged_check, alpha=alpha, m_tol=self._m_tol, imports=["regularization", "search_direction", "current_point"])
358 self.getCostFunction().splitworld.runJobs()
359 flag=self.getCostFunction().splitworld.getDoubleVariable("conv_flag")>0.001
360 converged = converged and flag
361
362 #Already done in the line_search call
363 #x=x_new
364 if converged:
365 self.logger.info("\tJ(x) = %s"%Jx)
366 break
367
368 # unfortunately there is more work to do!
369 def run_worker(self, **kwargs):
370 break_down=False
371 need_trunc=kwargs["need_trunc"]
372 # Need to do some imports here
373 alpha=kwargs["alpha"]
374 g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
375 old_g_Jx=ArithmeticTuple(self.importValue("old_g_Jx_0"), self.importValue("old_g_Jx_1"))
376 p=self.importValue("search_direction")
377 reg=self.importValue("regularization")
378 s_and_y=self.importValue("s_and_y")
379
380 ##The original code had this check
381 #if g_Jx_new is None:
382 #args=self.getCostFunction().getArguments(x_new)
383 #g_Jx_new=self.getCostFunction().getGradient(x_new, args)
384 #delta_g=g_Jx_new-g_Jx
385 delta_g=g_Jx-old_g_Jx
386 delta_x=alpha*p;
387 rho=reg.getDualProduct(delta_x, delta_g)
388 if abs(rho)>0:
389 s_and_y.append((delta_x,delta_g, rho ))
390 else:
391 break_down=True
392 if need_trunc:
393 s_and_y.pop(0)
394 self.exportValue("break_down", break_down)
395 self.exportValue("s_and_y",s_and_y)
396 # End run_worker
397 # Only one world has s_and_y - so its important that only single jobs be run that manipulate
398 # s_and_y
399 addJob(self.getCostFunction().splitworld, FunctionJob, run_worker, alpha=alpha, need_trunc=(k>=self._truncation))
400 self.getCostFunction().splitworld.runJobs()
401
402 break_down=(splitworld.getDoubleVariable("break_down")>0.001)
403 self.getCostFunction().updateHessian()
404 #g_Jx=g_Jx_new
405 #Jx=Jx_new
406
407 k+=1
408 n_iter+=1
409 #self._doCallback(n_iter, x, Jx, g_Jx)
410
411 # delete oldest vector pair
412 # Already done in the run_worker
413 #if k>self._truncation: s_and_y.pop(0)
414
415 if not self.getCostFunction().provides_inverse_Hessian_approximation and not break_down:
416 # set the new scaling factor (approximation of inverse Hessian)
417 denom=self.getCostFunction().getDualProduct(delta_g, delta_g)
418 if denom > 0:
419 invH_scale=self.getCostFunction().getDualProduct(delta_x,delta_g)/denom
420 else:
421 invH_scale=self._initial_H
422 self.logger.debug("** Break down in H update. Resetting to initial value %s."%self._initial_H)
423 # case handling for inner iteration:
424 if break_down:
425 if n_iter == n_last_break_down+1:
426 non_curable_break_down = True
427 self.logger.debug("** Incurable break down detected in step %d."%n_iter)
428 else:
429 n_last_break_down = n_iter
430 self.logger.debug("** Break down detected in step %d. Iteration is restarted."%n_iter)
431 if not k < self._restart:
432 self.logger.debug("Iteration is restarted after %d steps."%n_iter)
433
434 # case handling for inner iteration:
435 #self._result=x
436 self._result=None
437 if n_iter >= self._imax:
438 self.logger.warn(">>>>>>>>>> Maximum number of iterations reached! <<<<<<<<<<")
439 raise MinimizerMaxIterReached("Gave up after %d steps."%n_iter)
440 elif non_curable_break_down:
441 self.logger.warn(">>>>>>>>>> Incurable breakdown! <<<<<<<<<<")
442 raise MinimizerIterationIncurableBreakDown("Gave up after %d steps."%n_iter)
443
444 self.logger.info("Success after %d iterations!"%n_iter)
445 #This version does nor return the result
446 #You need to set up a job to extract the result
447
448 def _twoLoop(self, splitworld, reset):
449 """
450 Helper for the L-BFGS method.
451 See 'Numerical Optimization' by J. Nocedal for an explanation.
452
453 This has been converted to use splitworld.
454 As such it doesn't return a result, instead it stores
455 The "p" result in the "search_direction" variable
456
457 Depends on the following splitworld variables:
458 g_Jx_0, g_Jx_1
459 current_point
460 s_and_y
461
462 Other conversion notes:
463 The current cost function's inverseHessianApproximation and
464 dualProduct only
465 depend on the regularization term so this function can be
466 done entirely by one world. If the Hessian/dp ever needed
467 other worlds, this will need to be reworked.
468 Implicit in the above is that the overall cost function
469 has cf.provides_inverse_Hessian_approximation==True
470 """
471 # Make a fn to push the work into subworld
472 def two_loop_worker(self, **kwargs):
473 reset=kwargs['reset']
474 x=self.importValue("current_point")
475 reg=self.importValue("regularization")
476 g_Jx=ArithmeticTuple(self.importValue("g_Jx_0"), self.importValue("g_Jx_1"))
477 if reset:
478 s_and_y = []
479 self.exportValue('s_and_y', list())
480 else:
481 s_and_y=self.importValue("s_and_y")
482 q=g_Jx
483 alpha=[]
484 for s,y, rho in reversed(s_and_y):
485 a=reg.getDualProduct(s, q)/rho
486 alpha.append(a)
487 q=q-a*y
488
489 r=reg.getInverseHessianApproximationAtPoint(q)
490
491 for s,y,rho in s_and_y:
492 beta = reg.getDualProduct(r, y)/rho
493 a = alpha.pop()
494 r = r + s * (a-beta)
495 # In the original version, the caller negated the result
496 self.exportValue("search_direction", -r)
497 print ("prior to twoloop call ",splitworld.getVarList())
498 # Note: this is only running on world world (of possibly many)
499 # Any jobs which also need the variables exported here
500 # need to be shared or the jobs need to run on the same world
501 addJob(splitworld, FunctionJob, two_loop_worker, reset=reset, imports=["current_point", "regularization", "g_Jx_0", "g_Jx_1"])
502 splitworld.runJobs()
503
504

  ViewVC Help
Powered by ViewVC 1.1.26