Merge pull request #174 from HDI-Project/fix-btbsession-and-integrati…

…on-tests Fix btbsession and integration tests
MLBazaar · Feb 18, 2020 · 9368571 · 9368571
2 parents 0574d67 + e3e02f3
commit 9368571
Show file tree

Hide file tree

Showing 7 changed files with 378 additions and 159 deletions.
diff --git a/btb/session.py b/btb/session.py
@@ -30,46 +30,40 @@ class BTBSession:
     the tunables that have reached as many errors as the user specified.
 
     Attributes:
-        _tunables (dict):
+        best_proposal (dict):
+            Best configuration found with the name of the tunable and the hyperparameters
+            and crossvalidated score obtained for it.
+        best_score (float):
+            Best score obtained for this session so far.
+        proposals (dict):
+            Dictionary containing all the proposals generated by the ``BTBSession``.
+        iterations (int):
+            Amount of iterations run.
+        errors (list):
+            A list with produced errors during the session.
+
+    Args:
+        tunables (dict):
             Python dictionary that has as keys the name of the tunable and
             as value a dictionary with the tunable hyperparameters or an
             ``btb.tuning.tunable.Tunable`` instance.
-        _scorer (callable object / function):
+        scorer (callable object / function):
             A callable object or function with signature ``scorer(tunable_name, config)``
             wich should return only a single value.
-        _tuner_class (btb.tuning.tuner.BaseTuner):
+        tuner_class (btb.tuning.tuner.BaseTuner):
             A tuner based on BTB ``BaseTuner`` class. This tuner will manage the new proposals.
             Defaults to ``btb.tuning.tuners.gaussian_process.GPTuner``
-        _selector_class (btb.selection.selector.Selector):
+        selector_class (btb.selection.selector.Selector):
             A selector based on BTB ``Selector`` class. This will determinate which one of
             the tunables is performing better, and which one to test next. Defaults to
             ``btb.selection.selectors.ucb1.UCB1``
-        _maximize (bool):
+        maximize (bool):
             If ``True`` the scores are interpreted as bigger is better, if ``False`` then smaller
             is better, this should depend on the problem type (maximization or minimization).
             Defaults to ``True``.
-        _max_erors (int):
+        max_erors (int):
             Amount of errors allowed for a tunable to not generate a score. Once this amount of
             errors is reached, the tunable will be removed from the list. Defaults to 1.
-        best_proposal (dict):
-            Best configuration found with the name of the tunable and the hyperparameters
-            and crossvalidated score obtained for it.
-        best_score (float):
-            Best score obtained for this session so far.
-        proposals (dict):
-            Dictionary containing all the proposals generated by the ``BTBSession``.
-        iterations (int):
-            Amount of iterations run.
-        errors (list):
-            A list with produced errors during the session.
-        _best_normalized (float):
-            Best normalized score obtained.
-        _tunable_names (list):
-            A list that contains the tunables that still have proposals.
-        _normalized_scores (defaultdict):
-            Dictionary with the name of the tunables and the obtained normalized scores.
-        _tuners (dict):
-            The name of the tunable and the tuner instance to which this belongs.
         verbose (bool):
             If ``True`` a progress bar will be displayed for the ``run`` process.
     """
@@ -79,19 +73,18 @@ class BTBSession:
     _selector = None
     _maximize = None
     _max_errors = None
+    _best_normalized = None
+    _tunable_names = None
+    _normalized_scores = None
+    _tuners = None
+    _range = None
 
     best_proposal = None
     best_score = None
     proposals = None
     iterations = None
     errors = None
 
-    _best_normalized = None
-    _tunable_names = None
-    _normalized_scores = None
-    _tuners = None
-    _range = None
-
     def _normalize(self, score):
         if score is not None:
             return score if self._maximize else -score
@@ -113,7 +106,7 @@ def __init__(self, tunables, scorer, tuner_class=GPTuner, selector_class=UCB1,
         self.errors = Counter()
         self.best_score = None
 
-        self._best_normalized = self._normalize(-np.inf)
+        self._best_normalized = -np.inf
         self._normalized_scores = defaultdict(list)
         self._tuners = dict()
         self._range = trange if verbose else range
@@ -149,6 +142,21 @@ def _make_id(self, name, config):
 
         return md5(hashable).hexdigest()
 
+    def _remove_tunable(self, tunable_name):
+        """Remove a tunable from the candidates list.
+
+        This is necessary when:
+            - Duplicates are not allowed and the tunable has exhausted all its
+              configurations.
+            - The tunable has failed more than ``max_errors`` times.
+
+        When this happens, the tunable is removved from the tunables dict
+        and its scores are removed from the normmalized_scores dict used by
+        the selectors.
+        """
+        self._normalized_scores.pop(tunable_name, None)
+        self._tunables.pop(tunable_name, None)
+
     def propose(self):
         """Propose a new configuration to score.
 
@@ -178,11 +186,11 @@ def propose(self):
             StopTuning:
                 If the ``BTBSession`` has run out of proposals to generate.
         """
-        if not self._tunable_names:
+        if not self._tunables:
             raise StopTuning('There are no tunables left to try.')
 
         if len(self._tuners) < len(self._tunable_names):
-            tunable_name = self._tunable_names[len(self._normalized_scores)]
+            tunable_name = self._tunable_names[len(self._tuners)]
             tunable = self._tunables[tunable_name]
 
             if isinstance(tunable, dict):
@@ -195,19 +203,35 @@ def propose(self):
             LOGGER.info('Obtaining default configuration for %s', tunable_name)
             config = tunable.get_defaults()
 
-            self._tuners[tunable_name] = self._tuner_class(tunable)
+            if tunable.cardinality == 1:
+                LOGGER.warn('Skipping tuner creation for Tunable %s with cardinality 1',
+                            tunable_name)
+                tuner = None
+            else:
+                tuner = self._tuner_class(tunable)
+
+            self._tuners[tunable_name] = tuner
 
         else:
-            tunable_name = self._selector.select(self._normalized_scores)
+            if self._normalized_scores:
+                tunable_name = self._selector.select(self._normalized_scores)
+            else:
+                # if _normalized_scores is still empty the selector crashes
+                # this happens when max_errors > 1, all tunables have tuners
+                # and all previous trials have crashed.
+                tunable_name = np.random.choice(list(self._tuners.keys()))
+
             tuner = self._tuners[tunable_name]
             try:
+                if tuner is None:
+                    raise StopTuning('Tunable %s has no tunable hyperparameters', tunable_name)
+
                 LOGGER.info('Generating new proposal configuration for %s', tunable_name)
                 config = tuner.propose(1)
 
             except StopTuning:
-                LOGGER.info('%s has no more configs to propose.' % tunable_name)
-                self._normalized_scores.pop(tunable_name, None)
-                self._tunable_names.remove(tunable_name)
+                LOGGER.info('%s has no more configs to propose.', tunable_name)
+                self._remove_tunable(tunable_name)
                 tunable_name, config = self.propose()
 
         proposal_id = self._make_id(tunable_name, config)
@@ -234,8 +258,7 @@ def handle_error(self, tunable_name):
 
         if errors >= self._max_errors:
             LOGGER.warning('Too many errors: %s. Removing tunable %s', errors, tunable_name)
-            self._normalized_scores.pop(tunable_name, None)
-            self._tunable_names.remove(tunable_name)
+            self._remove_tunable(tunable_name)
 
     def record(self, tunable_name, config, score):
         """Record the configuration and the obtained score to the tuner.
@@ -268,9 +291,14 @@ def record(self, tunable_name, config, score):
                 self._best_normalized = normalized
             try:
                 tuner = self._tuners[tunable_name]
-                tuner.record(config, normalized)
+                if tuner is None:
+                    LOGGER.warn('Skipping record for Tunable %s with cardinality 1', tunable_name)
+                else:
+                    tuner.record(config, normalized)
+
             except Exception:
-                LOGGER.exception('Could not record configuration and score to tuner.')
+                LOGGER.exception('Could not record configuration and score for tuner %s.',
+                                 tunable_name)
 
     def run(self, iterations=None):
         """Run the selection and tuning loop for the given number of iterations.

diff --git a/btb/tuning/__init__.py b/btb/tuning/__init__.py
@@ -6,6 +6,7 @@
 from btb.tuning.hyperparams.categorical import CategoricalHyperParam
 from btb.tuning.hyperparams.numerical import FloatHyperParam, IntHyperParam
 from btb.tuning.tunable import Tunable
+from btb.tuning.tuners.base import StopTuning
 from btb.tuning.tuners.gaussian_process import GPEiTuner, GPTuner
 from btb.tuning.tuners.uniform import UniformTuner
 
@@ -16,6 +17,7 @@
     'GPTuner',
     'FloatHyperParam',
     'IntHyperParam',
+    'StopTuning',
     'Tunable',
     'UniformTuner',
 )
diff --git a/tests/integration/test_benchmark.py b/tests/integration/test_benchmark.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+
+from btb.benchmark import benchmark
+from btb.benchmark.challenges import Rosenbrock
+from btb.benchmark.tuners.btb import make_tuning_function
+from btb.tuning import GPTuner
+
+
+def test_benchmark_rosenbrock():
+    # run
+    candidate = make_tuning_function(GPTuner)
+    df = benchmark(candidate, challenges=Rosenbrock(), iterations=1)
+
+    # Assert
+    np.testing.assert_equal(df.columns.values, ['Rosenbrock()', 'Mean', 'Std'])
+    np.testing.assert_equal(df.index.values, ['tuning_function'])
+    np.testing.assert_equal(df.dtypes.values, [np.int, np.float, np.float])
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py