Skip to content

Commit

Permalink
Added BB step size rule to our step size methods (#1859)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Margaret Duff <43645617+MargaretDuff@users.noreply.github.com>
  • Loading branch information
MargaretDuff authored Aug 22, 2024
1 parent 6d9978b commit 1088bc3
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Added SVRG and LSVRG stochastic functions (#1625)
- Added SAG and SAGA stochastic functions (#1624)
- Allow `SumFunction` with 1 item (#1857)
- Added Barzilai-Borwein step size rule to work with GD, ISTA, FISTA (#1859)
- Added callback `optimisation.utilities.callbacks.EarlyStoppingObjectiveValue` which stops iterations if an algorithm objective changes less than a provided threshold (#1892)
- Added callback `optimisation.utilities.callbacks.CGLSEarlyStopping` which replicates the automatic behaviour of CGLS in CIL versions <=24. (#1892)
- Enhancements:
Expand Down
119 changes: 113 additions & 6 deletions Wrappers/Python/cil/optimisation/utilities/StepSizeMethods.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from abc import ABC, abstractmethod
import numpy

from numbers import Number

class StepSizeRule(ABC):
"""
Expand Down Expand Up @@ -70,8 +70,7 @@ def get_step_size(self, algorithm):

class ArmijoStepSizeRule(StepSizeRule):

"""
Applies the Armijo rule to calculate the step size (step_size).
r""" Applies the Armijo rule to calculate the step size (step_size).
The Armijo rule runs a while loop to find the appropriate step_size by starting from a very large number (`alpha`). The step_size is found by reducing the step size (by a factor `beta`) in an iterative way until a certain criterion is met. To avoid infinite loops, we add a maximum number of times (`max_iterations`) the while loop is run.
Expand All @@ -85,9 +84,10 @@ class ArmijoStepSizeRule(StepSizeRule):
The maximum number of iterations to find a suitable step size
Reference
---------
Algorithm 3.1 (Numerical Optimization, Nocedal, Wright) (https://www.math.uci.edu/~qnie/Publications/NumericalOptimization.pdf)
https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080
------------
- Algorithm 3.1 in Nocedal, J. and Wright, S.J. eds., 1999. Numerical optimization. New York, NY: Springer New York. https://www.math.uci.edu/~qnie/Publications/NumericalOptimization.pdf)
- https://projecteuclid.org/download/pdf_1/euclid.pjm/1102995080
"""

Expand Down Expand Up @@ -138,3 +138,110 @@ def get_step_size(self, algorithm):
raise ValueError(
'Could not find a proper step_size in {} loops. Consider increasing alpha or max_iterations.'.format(self.max_iterations))
return self.alpha


class BarzilaiBorweinStepSizeRule(StepSizeRule):

r""" Applies the Barzilai- Borwein rule to calculate the step size (step_size).
Let :math:`\Delta x=x_k-x_{k-1}` and :math:`\Delta g=g_k-g_{k-1}`. Where :math:`x_k` is the :math:`k` th iterate (current solution after iteration :math:`k` ) and :math:`g_k` is the gradient calculation in the :math:`k` th iterate, found in :code:`algorithm.gradient_update`. A Barzilai-Borwein (BB) iteration is :math:`x_{k+1}=x_k-\alpha_kg_k` where the step size :math:`\alpha _k` is either
- :math:`\alpha_k^{LONG}=\frac{\Delta x\cdot\Delta x}{\Delta x\cdot\Delta g}`, or
- :math:`\alpha_k^{SHORT}=\frac{\Delta x \cdot\Delta g}{\Delta g \cdot\Delta g}`.
Where the operator :math:`\cdot` is the standard inner product between two vectors.
This is suitable for use with gradient based iterative methods where the calculated gradient is stored as `algorithm.gradient_update`.
Parameters
----------
initial: float, greater than zero
The step-size for the first iteration. We recommend something of the order :math:`1/f.L` where :math:`f` is the (differentiable part of) the objective you wish to minimise.
mode: One of 'long', 'short' or 'alternate', default is 'short'.
This calculates the step-size based on the LONG, SHORT or alternating between the two, starting with short.
stabilisation_param: 'auto', float or 'off', default is 'auto'
In order to add stability the step-size has an upper limit of :math:`\Delta/\|g_k\|` where by 'default', the `stabilisation_param`, :math:`\Delta` is determined automatically to be the minimium of :math:`\Delta x` from the first 3 iterations. The user can also pass a fixed constant or turn "off" the stabilisation, equivalently passing `np.inf`.
Reference
---------
- Barzilai, Jonathan; Borwein, Jonathan M. (1988). "Two-Point Step Size Gradient Methods". IMA Journal of Numerical Analysis. 8: 141–148, https://doi.org/10.1093/imanum/8.1.141
- Burdakov, O., Dai, Y. and Huang, N., 2019. STABILIZED BARZILAI-BORWEIN METHOD. Journal of Computational Mathematics, 37(6). https://doi.org/10.4208/jcm.1911-m2019-0171
- https://en.wikipedia.org/wiki/Barzilai-Borwein_method
"""

def __init__(self, initial, mode='short', stabilisation_param="auto"):
'''Initialises the step size rule
'''

self.mode=mode
if self.mode == 'short':
self.is_short = True
elif self.mode == 'long' or self.mode == 'alternate':
self.is_short = False
else:
raise ValueError('Mode should be chosen from "long", "short" or "alternate". ')

self.store_grad=None
self.store_x=None
self.initial=initial
if stabilisation_param == 'auto':
self.adaptive = True
stabilisation_param = numpy.inf
elif stabilisation_param == "off":
self.adaptive = False
stabilisation_param = numpy.inf
elif ( isinstance(stabilisation_param, Number) and stabilisation_param >=0):
self.adaptive = False
else:
raise TypeError(" The stabilisation_param should be 'auto', a positive number or 'off'")
self.stabilisation_param=stabilisation_param



def get_step_size(self, algorithm):
"""
Applies the B-B rule to calculate the step size (`step_size`)
Returns
--------
the calculated step size:float
"""
#For the first iteration we use an initial step size because the BB step size requires a previous iterate.
if self.store_x is None:
self.store_x=algorithm.x.copy() # We store the last iterate in order to calculate the BB step size
self.store_grad=algorithm.gradient_update.copy()# We store the last gradient in order to calculate the BB step size
return self.initial

gradient_norm = algorithm.gradient_update.norm()
#If the gradient is zero, gradient based algorithms will not update and te step size calculation will divide by zero so we stop iterations.
if gradient_norm < 1e-8:
raise StopIteration

algorithm.x.subtract(self.store_x, out=self.store_x)
algorithm.gradient_update.subtract(self.store_grad, out=self.store_grad)
if self.is_short:
ret = (self.store_x.dot(self.store_grad))/ (self.store_grad.dot(self.store_grad))
else:
ret = (self.store_x.dot(self.store_x))/ (self.store_x.dot(self.store_grad))


#This computes the default stabilisation parameter, using the first three iterations
if (algorithm.iteration <=3 and self.adaptive):
self.stabilisation_param = min(self.stabilisation_param, self.store_x.norm() )

# Computes the step size as the minimum of the ret, above, and :math:`\Delta/\|g_k\|` ignoring any NaN values.
ret = numpy.nanmin( numpy.array([ret, self.stabilisation_param/gradient_norm]))

# We store the last iterate and gradient in order to calculate the BB step size
self.store_x.fill(algorithm.x)
self.store_grad.fill(algorithm.gradient_update)

if self.mode == "alternate":
self.is_short = not self.is_short

return ret
2 changes: 1 addition & 1 deletion Wrappers/Python/cil/optimisation/utilities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@

from .sampler import Sampler
from .sampler import SamplerRandom
from .StepSizeMethods import ConstantStepSize, ArmijoStepSizeRule, StepSizeRule
from .StepSizeMethods import ConstantStepSize, ArmijoStepSizeRule, StepSizeRule, BarzilaiBorweinStepSizeRule
from .preconditioner import Preconditioner, AdaptiveSensitivity, Sensitivity
170 changes: 168 additions & 2 deletions Wrappers/Python/test/test_stepsizes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from cil.optimisation.algorithms import SIRT, GD, ISTA, FISTA
from cil.optimisation.functions import LeastSquares, IndicatorBox
from cil.framework import ImageGeometry, VectorGeometry
from cil.framework import ImageGeometry, VectorGeometry, VectorData
from cil.optimisation.operators import IdentityOperator, MatrixOperator

from cil.optimisation.utilities import Sensitivity, AdaptiveSensitivity, Preconditioner, ConstantStepSize, ArmijoStepSizeRule
from cil.optimisation.utilities import Sensitivity, AdaptiveSensitivity, Preconditioner, ConstantStepSize, ArmijoStepSizeRule, BarzilaiBorweinStepSizeRule
import numpy as np

from testclass import CCPiTestClass
Expand Down Expand Up @@ -77,3 +77,169 @@ def test_armijo_calculation(self):
alg.gradient_update = ig.allocate(-2)
step_size = test_stepsize.get_step_size(alg)
self.assertAlmostEqual(step_size, 2)

def test_bb(self):
n = 10
m = 5

A = np.random.uniform(0, 1, (m, n)).astype('float32')
b = (A.dot(np.random.randn(n)) + 0.1 *
np.random.randn(m)).astype('float32')

Aop = MatrixOperator(A)
bop = VectorData(b)
ig=Aop.domain
initial = ig.allocate()
f = LeastSquares(Aop, b=bop, c=0.5)

ss_rule=BarzilaiBorweinStepSizeRule(2 )
self.assertEqual(ss_rule.mode, 'short')
self.assertEqual(ss_rule.initial, 2)
self.assertEqual(ss_rule.adaptive, True)
self.assertEqual(ss_rule.stabilisation_param, np.inf)

#Check the right errors are raised for incorrect parameters

with self.assertRaises(TypeError):
ss_rule=BarzilaiBorweinStepSizeRule(2,'short',-4, )
with self.assertRaises(TypeError):
ss_rule=BarzilaiBorweinStepSizeRule(2,'long', 'banana', )
with self.assertRaises(ValueError):
ss_rule=BarzilaiBorweinStepSizeRule(2, 'banana',3 )


#Check stabilisation parameter unchanged if fixed
ss_rule=BarzilaiBorweinStepSizeRule(2, 'long',3 )
self.assertEqual(ss_rule.mode, 'long')
self.assertFalse(ss_rule.adaptive)
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertEqual(ss_rule.stabilisation_param,3)
alg.run(2)
self.assertEqual(ss_rule.stabilisation_param,3)

#Check infinity can be passed
ss_rule=BarzilaiBorweinStepSizeRule(2, 'short',"off" )
self.assertEqual(ss_rule.mode, 'short')
self.assertFalse(ss_rule.adaptive)
self.assertEqual(ss_rule.stabilisation_param,np.inf)
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(2)

n = 5
m = 5

A = np.eye(5).astype('float32')
b = (np.array([.5,.5,.5,.5,.5])).astype('float32')

Aop = MatrixOperator(A)
bop = VectorData(b)
ig=Aop.domain
initial = ig.allocate(0)
f = LeastSquares(Aop, b=bop, c=0.5)
ss_rule=BarzilaiBorweinStepSizeRule(0.22, 'long',np.inf )
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertFalse(ss_rule.is_short)
#Check the initial step size was used
alg.run(1)
self.assertNumpyArrayAlmostEqual( np.array([.11,.11,.11,.11,.11]), alg.x.as_array() )
self.assertFalse(ss_rule.is_short)
#check long
alg.run(1)
x_change= np.array([.11,.11,.11,.11,.11])-np.array([0,0,0,0,0])
grad_change = -np.array([.39,.39,.39,.39,.39])+np.array([.5,.5,.5,.5,.5])
step= x_change.dot(x_change)/x_change.dot(grad_change)
self.assertNumpyArrayAlmostEqual( np.array([.11,.11,.11,.11,.11])+step*np.array([.39,.39,.39,.39,.39]), alg.x.as_array() )
self.assertFalse(ss_rule.is_short)

ss_rule=BarzilaiBorweinStepSizeRule(0.22, 'short',np.inf )
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertTrue(ss_rule.is_short)
#Check the initial step size was used
alg.run(1)
self.assertNumpyArrayAlmostEqual( np.array([.11,.11,.11,.11,.11]), alg.x.as_array() )
self.assertTrue(ss_rule.is_short)
#check short
alg.run(1)
x_change= np.array([.11,.11,.11,.11,.11])-np.array([0,0,0,0,0])
grad_change = -np.array([.39,.39,.39,.39,.39])+np.array([.5,.5,.5,.5,.5])
step= x_change.dot(grad_change)/grad_change.dot(grad_change)
self.assertNumpyArrayAlmostEqual( np.array([.11,.11,.11,.11,.11])+step*np.array([.39,.39,.39,.39,.39]), alg.x.as_array() )
self.assertTrue(ss_rule.is_short)

#check stop iteration
ss_rule=BarzilaiBorweinStepSizeRule(1, 'long',np.inf )
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(500)
self.assertEqual(alg.iteration, 1)

#check adaptive
ss_rule=BarzilaiBorweinStepSizeRule(0.001, 'long',"auto")
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertEqual(ss_rule.stabilisation_param, np.inf)
alg.run(2)
self.assertNotEqual(ss_rule.stabilisation_param, np.inf)

#check stops being adaptive

ss_rule=BarzilaiBorweinStepSizeRule(0.0000001, 'long',"auto" )
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertEqual(ss_rule.stabilisation_param, np.inf)
alg.run(4)
self.assertNotEqual(ss_rule.stabilisation_param, np.inf)
a=ss_rule.stabilisation_param
alg.run(1)
self.assertEqual(ss_rule.stabilisation_param, a)

#Test alternating
ss_rule=BarzilaiBorweinStepSizeRule(0.0000001, 'alternate',"auto" )
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
self.assertFalse(ss_rule.is_short)
alg.run(2)
self.assertTrue(ss_rule.is_short)
alg.run(1)
self.assertFalse(ss_rule.is_short)
alg.run(1)
self.assertTrue(ss_rule.is_short)




def test_bb_converge(self):
n = 10
m = 5
np.random.seed(4)
A = np.random.uniform(0, 1, (m, n)).astype('float32')
b = (A.dot(np.random.randn(n)) + 0.1 *
np.random.randn(m)).astype('float32')

Aop = MatrixOperator(A)
bop = VectorData(b)
ig=Aop.domain
initial = ig.allocate()
f = LeastSquares(Aop, b=bop, c=2)

ss_rule=ArmijoStepSizeRule(max_iterations=40)
alg_true = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg_true .run(300, verbose=0)



ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'short')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)


ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'long')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)
alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)


ss_rule=BarzilaiBorweinStepSizeRule(1/f.L, 'alternate')
alg = GD(initial=initial, objective_function=f, step_size=ss_rule)

alg.run(80, verbose=0)
self.assertNumpyArrayAlmostEqual(alg.x.as_array(), alg_true.x.as_array(), decimal=3)


4 changes: 4 additions & 0 deletions docs/source/optimisation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,10 @@ We also have a number of example classes:
.. autoclass:: cil.optimisation.utilities.StepSizeMethods.ArmijoStepSizeRule
:members:

.. autoclass:: cil.optimisation.utilities.StepSizeMethods.BarzilaiBorweinStepSizeRule
:members:



Preconditioners
----------------
Expand Down

0 comments on commit 1088bc3

Please sign in to comment.