Merge pull request #1459 from pints-team/1105-irpropmin

iRprop- local optimiser.
pints-team · Aug 17, 2022 · 9acb238 · 9acb238
2 parents 27d8d26 + edff89e
commit 9acb238
Show file tree

Hide file tree

Showing 10 changed files with 781 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file.
 ## Unreleased
 
 ### Added
+- [#1459](https://github.com/pints-team/pints/pull/1459) Added the `iRprop-` local optimiser.
 - [#1432](https://github.com/pints-team/pints/pull/1432) Added 2 new stochastic models: production and degradation model, Schlogl's system of chemical reactions. Moved the stochastic logistic model into `pints.stochastic` to take advantage of the `MarkovJumpModel`.
 - [#1420](https://github.com/pints-team/pints/pull/1420) The `Optimiser` class now distinguishes between a best-visited point (`x_best`, with score `f_best`) and a best-guessed point (`x_guessed`, with approximate score `f_guessed`). For most optimisers, the two values are equivalent. The `OptimisationController` still tracks `x_best` and `f_best` by default, but this can be modified using the methods `set_f_guessed_tracking` and `f_guessed_tracking`.
 - [#1417](https://github.com/pints-team/pints/pull/1417) Added a module `toy.stochastic` for stochastic models. In particular, `toy.stochastic.MarkovJumpModel` implements Gillespie's algorithm for easier future implementation of stochastic models.

diff --git a/docs/source/optimisers/index.rst b/docs/source/optimisers/index.rst
@@ -19,6 +19,7 @@ or the :class:`OptimisationController` class.
     cmaes_bare
     cmaes
     gradient_descent
+    irpropmin
     nelder_mead
     pso
     snes

diff --git a/docs/source/optimisers/irpropmin.rst b/docs/source/optimisers/irpropmin.rst
@@ -0,0 +1,8 @@
+*************************
+Improved Rprop- (iRprop-)
+*************************
+
+.. currentmodule:: pints
+
+.. autoclass:: IRPropMin
+
diff --git a/examples/README.md b/examples/README.md
@@ -33,12 +33,13 @@ relevant code.
 ### Particle-based methods
 - [CMA-ES](./optimisation/cmaes.ipynb)
 - [CMA-ES (bare bones version)](./optimisation/cmaes-bare.ipynb)
-- [Gradient descent](./optimisation/gradient-descent.ipynb)
 - [PSO](./optimisation/pso.ipynb)
 - [SNES](./optimisation/snes.ipynb)
 - [XNES](./optimisation/xnes.ipynb)
 
 ### Local optimisers
+- [Gradient descent](./optimisation/gradient-descent.ipynb)
+- [iRprop-](./optimisation/irpropmin.ipynb)
 - [Nelder-Mead](./optimisation/nelder-mead.ipynb)
 
 ### Further optimisation

diff --git a/examples/optimisation/irpropmin.ipynb b/examples/optimisation/irpropmin.ipynb
diff --git a/pints/__init__.py b/pints/__init__.py
@@ -176,6 +176,7 @@ def version(formatted=False):
 from ._optimisers._cmaes import CMAES
 from ._optimisers._cmaes_bare import BareCMAES
 from ._optimisers._gradient_descent import GradientDescent
+from ._optimisers._irpropmin import IRPropMin
 from ._optimisers._nelder_mead import NelderMead
 from ._optimisers._pso import PSO
 from ._optimisers._snes import SNES

diff --git a/pints/_optimisers/__init__.py b/pints/_optimisers/__init__.py
@@ -947,10 +947,10 @@ def optimise(
         :class:`PSO`) or indirectly (for example as the center of a
         distribution in :class:`XNES`).
     sigma0
-        An optional initial standard deviation around ``x0``. Can be specified
-        either as a scalar value (one standard deviation for all coordinates)
-        or as an array with one entry per dimension. Not all methods will use
-        this information.
+        An optional initial standard deviation around ``x0``, or a parameter
+        representing the "scale" of the parameters. Can be specified either as
+        a scalar value (same value for all dimensions) or as an array with one
+        entry per dimension. Not all methods will use this information.
     boundaries
         An optional set of boundaries on the parameter space.
     transformation

diff --git a/pints/_optimisers/_gradient_descent.py b/pints/_optimisers/_gradient_descent.py
@@ -15,7 +15,7 @@ class GradientDescent(pints.Optimiser):
     The initial learning rate is set as ``min(sigma0)``, but this can be
     changed at any time with :meth:`set_learning_rate()`.
 
-    This is an unbounded methods: Any ``boundaries`` will be ignored.
+    This is an unbounded method: Any ``boundaries`` will be ignored.
     """
 
     def __init__(self, x0, sigma0=0.1, boundaries=None):

diff --git a/pints/_optimisers/_irpropmin.py b/pints/_optimisers/_irpropmin.py
@@ -0,0 +1,198 @@
+#
+# Improved Rprop local optimiser.
+#
+# This file is part of PINTS (https://github.com/pints-team/pints/) which is
+# released under the BSD 3-clause license. See accompanying LICENSE.md for
+# copyright notice and full license details.
+#
+import pints
+
+import numpy as np
+
+
+class IRPropMin(pints.Optimiser):
+    """
+    iRprop- algorithm, as described in Figure 3 of [1]_.
+
+    This is a local optimiser that requires gradient information, although it
+    uses only the direction (sign) of the gradient in each dimension and
+    ignores the magnitude. Instead, it maintains a separate step size for each
+    dimension which grows when the sign of the gradient stays the same and
+    shrinks when it changes.
+
+    Pseudo-code is given below. Here ``p_j[i]`` denotes the j-th parameter at
+    iteration ``i``, and ``df_j[i]`` is the corresponding derivative of the
+    objective function (so both are scalars)::
+
+        if df_j[i] * df_j[i - 1] > 0:
+            step_size_j[i] = 1.2 * step_size_j[i-1]
+        elif df_j[i] * df_j[i - 1] < 0:
+            step_size_j[i] = 0.5 * step_size_j[i-1]
+            df_j[i - 1] = 0
+        p_j[i] = p_j[i] - sign(df_j[i]) * step_size_j[i]
+
+    The line ``df_j[i - 1] = 0`` has two effects:
+
+        1. It sets the update at this iteration to zero (using
+           ``sign(df_j[i]) * step_size_j[i] = 0 * step_size_j[i]``).
+        2. It ensures that the next iteration is performed (since
+           ``df_j[i + 1] * df_j[i] = 0`` so neither if statement holds).
+
+    In this implementation, the ``step_size`` is initialised as ``sigma_0``,
+    the increase (0.5) & decrease factors (1.2) are fixed, and a minimum step
+    size of ``1e-3 * min(sigma0)`` is enforced.
+
+    This is an unbounded method: Any ``boundaries`` will be ignored.
+
+    The name "iRprop-" was introduced by [1]_, and is a variation on the
+    "Resilient backpropagation (Rprop)" optimiser introduced in [2]_.
+
+    References
+    ----------
+    .. [1] Empirical Evaluation of the Improved Rprop Learning Algorithms.
+           Igel and Hüsken, 2003, Neurocomputing
+           https://doi.org/10.1016/S0925-2312(01)00700-7
+    .. [2] A direct adaptive method for faster backpropagation learning: the
+           RPROP algorithm. Riedmiller and Braun, 1993.
+           https://doi.org/10.1109/ICNN.1993.298623
+
+    """
+
+    def __init__(self, x0, sigma0=0.1, boundaries=None):
+        super().__init__(x0, sigma0, boundaries)
+
+        # Set optimiser state
+        self._running = False
+        self._ready_for_tell = False
+
+        # Best solution found
+        self._x_best = self._x0
+        self._f_best = float('inf')
+
+        # Minimum and maximum step sizes
+        self._step_min = 1e-3 * np.min(self._sigma0)
+
+        # Current point, score, and gradient
+        self._current = self._x0
+        self._current_f = float('inf')
+        self._current_df = None
+
+        # Proposed next point (read-only, so can be passed to user)
+        self._proposed = self._x0
+        self._proposed.setflags(write=False)
+
+        # Step size adaptations
+        # TODO: Could be hyperparameters, but almost nobody varies these?
+        self._eta_min = 0.5  # 0 < eta_min < 1
+        self._eta_max = 1.2  # 1 < eta_max
+
+        # Current step sizes
+        self._step_size = np.array(self._sigma0)
+
+    def ask(self):
+        """ See :meth:`Optimiser.ask()`. """
+
+        # Running, and ready for tell now
+        self._ready_for_tell = True
+        self._running = True
+
+        # Return proposed points (just the one)
+        return [self._proposed]
+
+    def f_best(self):
+        """ See :meth:`Optimiser.f_best()`. """
+        return self._f_best
+
+    def f_guessed(self):
+        """ See :meth:`Optimiser.f_guessed()`. """
+        return self._current_f
+
+    def _log_init(self, logger):
+        """ See :meth:`Loggable._log_init()`. """
+        logger.add_float('Min. step')
+        logger.add_float('Max. step')
+
+    def _log_write(self, logger):
+        """ See :meth:`Loggable._log_write()`. """
+        logger.log(np.min(self._step_size))
+        logger.log(np.max(self._step_size))
+
+    def name(self):
+        """ See :meth:`Optimiser.name()`. """
+        return 'iRprop-'
+
+    def needs_sensitivities(self):
+        """ See :meth:`Optimiser.needs_sensitivities()`. """
+        return True
+
+    def n_hyper_parameters(self):
+        """ See :meth:`pints.TunableMethod.n_hyper_parameters()`. """
+        return 0
+
+    def running(self):
+        """ See :meth:`Optimiser.running()`. """
+        return self._running
+
+    def tell(self, reply):
+        """ See :meth:`Optimiser.tell()`. """
+
+        # Check ask-tell pattern
+        if not self._ready_for_tell:
+            raise Exception('ask() not called before tell()')
+        self._ready_for_tell = False
+
+        # Unpack reply
+        fx, dfx = reply[0]
+
+        # First iteration
+        if self._current_df is None:
+            self._current_f = fx
+            self._current_df = dfx
+            self._f_best = fx
+            self._x_best = self._current
+            return
+
+        # Get product of new and previous gradient
+        dprod = dfx * self._current_df
+
+        # Note: Could implement boundaries here by setting all dprod to < 0 if
+        # the point is out of bounds?
+
+        # Adapt step sizes
+        self._step_size[dprod > 0] *= self._eta_max
+        self._step_size[dprod < 0] *= self._eta_min
+
+        # Bound step sizes
+        if self._step_min is not None:
+            self._step_size = np.maximum(self._step_size, self._step_min)
+        # Note: Could implement step_max here if desired
+
+        # Remove "weight backtracking"
+        # This step ensures that, for each i where dprod < 0:
+        #  1. p[i] will remain unchanged this iteration (sign(0) == 0)
+        #  2. p[i] will change in the next iteration (dprod == 0), using
+        #     the step size set in the current iteration
+        dfx[dprod < 0] = 0
+
+        # "Accept" proposed point
+        self._current = self._proposed
+        self._current_f = fx
+        self._current_df = dfx
+
+        # Take step in direction indicated by current gradient
+        self._proposed = self._current - self._step_size * np.sign(dfx)
+        self._proposed.setflags(write=False)
+
+        # Update x_best and f_best
+        if self._f_best > fx:
+            self._f_best = fx
+            self._x_best = self._current
+
+    def x_best(self):
+        """ See :meth:`Optimiser.x_best()`. """
+        return self._x_best
+
+    def x_guessed(self):
+        """ See :meth:`Optimiser.x_guessed()`. """
+        return self._current
+
diff --git a/pints/tests/test_opt_irpropmin.py b/pints/tests/test_opt_irpropmin.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+#
+# Tests the API of the iRprop- optimiser.
+#
+# This file is part of PINTS (https://github.com/pints-team/pints/) which is
+# released under the BSD 3-clause license. See accompanying LICENSE.md for
+# copyright notice and full license details.
+#
+import unittest
+import numpy as np
+
+import pints
+import pints.toy
+
+from shared import StreamCapture
+
+
+debug = False
+method = pints.IRPropMin
+
+
+class TestIRPropMin(unittest.TestCase):
+    """
+    Tests the API of the iRprop- optimiser.
+    """
+    def setUp(self):
+        """ Called before every test """
+        np.random.seed(1)
+
+    def problem(self):
+        """ Returns a test problem, starting point, and sigma. """
+        r = pints.toy.ParabolicError()
+        x = [0.1, 0.1]
+        s = 0.1
+        return r, x, s
+
+    def test_simple(self):
+        # Runs an optimisation
+        r, x, s = self.problem()
+
+        opt = pints.OptimisationController(r, x, sigma0=s, method=method)
+        opt.set_log_to_screen(debug)
+        found_parameters, found_solution = opt.run()
+
+        # True solution is (0, 0) with error 0
+        self.assertTrue(found_solution < 1e-6)
+        self.assertLess(abs(found_parameters[0]), 1e-4)
+        self.assertLess(abs(found_parameters[1]), 1e-4)
+
+    def test_ask_tell(self):
+        # Tests ask-and-tell related error handling.
+        r, x, s = self.problem()
+        opt = method(x)
+
+        # Stop called when not running
+        self.assertFalse(opt.running())
+        self.assertFalse(opt.stop())
+
+        # Best position and score called before run
+        self.assertEqual(list(opt.x_best()), list(x))
+        self.assertEqual(list(opt.x_guessed()), list(x))
+        self.assertEqual(opt.f_best(), float('inf'))
+        self.assertEqual(opt.f_guessed(), float('inf'))
+
+        # Tell before ask
+        self.assertRaisesRegex(
+            Exception, r'ask\(\) not called before tell\(\)', opt.tell, 5)
+
+        # Ask
+        opt.ask()
+
+        # Now we should be running
+        self.assertTrue(opt.running())
+
+    def test_hyper_parameter_interface(self):
+        # Tests the hyper parameter interface for this optimiser.
+        opt = method([0])
+        self.assertEqual(opt.n_hyper_parameters(), 0)
+
+    def test_logging(self):
+
+        # Test with logpdf
+        r, x, s = self.problem()
+        opt = pints.OptimisationController(r, x, s, method=method)
+        opt.set_log_to_screen(True)
+        opt.set_max_unchanged_iterations(None)
+        opt.set_max_iterations(2)
+        with StreamCapture() as c:
+            opt.run()
+        lines = c.text().splitlines()
+        self.assertEqual(lines[0], 'Minimising error measure')
+        self.assertEqual(
+            lines[1], 'Using ' + opt.optimiser().name())
+        self.assertEqual(lines[2], 'Running in sequential mode.')
+        self.assertEqual(
+            lines[3],
+            'Iter. Eval. Best      Current   Min. step Max. step Time m:s')
+        self.assertEqual(
+            lines[4][:-3],
+            '0     1      0.02      0.02      0.1       0.1        0:0')
+        self.assertEqual(
+            lines[5][:-3],
+            '1     2      0.02      0.02      0.12      0.12       0:0')
+
+    def test_name(self):
+        # Test the name() method.
+        opt = method(np.array([0]))
+        self.assertEqual(opt.name(), 'iRprop-')
+        self.assertTrue(opt.needs_sensitivities())
+
+
+if __name__ == '__main__':
+    print('Add -v for more debug output')
+    import sys
+    if '-v' in sys.argv:
+        debug = True
+    unittest.main()
+