Adding pairwise MC BALD acqf (pytorch#1855)

Summary: Pull Request resolved: pytorch#1855 Active learning acquisition function BALD for PBO and BOPE Reviewed By: Balandat Differential Revision: D40027282 fbshipit-source-id: 17f64ba4c2351ca4c49aac9fe9be504f3aa94b8b
ItsMrLin · Jun 3, 2023 · e738511 · e738511
1 parent d8ab55b
commit e738511
Show file tree

Hide file tree

Showing 3 changed files with 161 additions and 49 deletions.
diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
@@ -58,7 +58,10 @@
     ScalarizedObjective,
     ScalarizedPosteriorTransform,
 )
-from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
+from botorch.acquisition.preference import (
+    AnalyticExpectedUtilityOfBestOption,
+    PairwiseBayesianActiveLearningByDisagreement,
+)
 from botorch.acquisition.proximal import ProximalAcquisitionFunction
 from botorch.acquisition.utils import get_acquisition_function
 
@@ -73,6 +76,7 @@
     "InverseCostWeightedUtility",
     "NoisyExpectedImprovement",
     "OneShotAcquisitionFunction",
+    "PairwiseBayesianActiveLearningByDisagreement",
     "PairwiseMCPosteriorVariance",
     "PosteriorMean",
     "ProbabilityOfImprovement",

diff --git a/botorch/acquisition/preference.py b/botorch/acquisition/preference.py
@@ -12,19 +12,27 @@
     Lin, Z.J., Astudillo, R., Frazier, P.I. and Bakshy, E. Preference Exploration
     for Efficient Bayesian Optimization with Multiple Outcomes. International
     Conference on Artificial Intelligence and Statistics (AISTATS), 2022.
+
+.. [Houlsby2011bald]
+    Houlsby, N., Huszár, F., Ghahramani, Z. and Lengyel, M.
+    Bayesian Active Learning for Gaussian Process Classification.
+    NIPS Workshop on Bayesian optimization, experimental design and bandits:
+    Theory and applications, 2011.
 """
 
 from __future__ import annotations
 
-from typing import Optional
+from typing import Any, Optional
 
 import torch
 from botorch.acquisition import AnalyticAcquisitionFunction
+from botorch.acquisition.monte_carlo import MCAcquisitionFunction
 from botorch.exceptions.errors import UnsupportedError
 from botorch.models.deterministic import DeterministicModel
 from botorch.models.model import Model
 from botorch.utils.transforms import match_batch_shape, t_batch_mode_transform
 from torch import Tensor
+from torch.distributions import Bernoulli, Normal
 
 SIGMA_JITTER = 1e-8
 
@@ -45,14 +53,13 @@ def __init__(
         Args:
             pref_model: The preference model that maps the outcomes (i.e., Y) to
                 scalar-valued utility.
-            model: A deterministic model that maps parameters (i.e., X) to outcomes
+            outcome_model: A deterministic model that maps parameters (i.e., X) to outcomes
                 (i.e., Y). The outcome model f defines the search space of Y = f(X).
                 If model is None, we are directly calculating EUBO on the parameter
                 space. When used with `OneSamplePosteriorDrawModel`, we are obtaining
-                EUBO-zeta as described in [Lin2022preference].
+                EUBO-zeta as described in [Lin2022preference]_.
             previous_winner: Tensor representing the previous winner in the Y space.
         """
-        pref_model.eval()
         super().__init__(model=pref_model)
         # ensure the model is in eval mode
         self.add_module("outcome_model", outcome_model)
@@ -80,7 +87,7 @@ def forward(self, X: Tensor) -> Tensor:
             The acquisition value for each batch as a tensor of shape `batch_shape`.
         """
         if not (
-            (X.shape[-2] == 2)
+            ((X.shape[-2] == 2) and (self.previous_winner is None))
             or ((X.shape[-2] == 1) and (self.previous_winner is not None))
         ):
             raise UnsupportedError(
@@ -93,19 +100,14 @@ def forward(self, X: Tensor) -> Tensor:
         if self.previous_winner is not None:
             Y = torch.cat([Y, match_batch_shape(self.previous_winner, Y)], dim=-2)
 
-        # Calling forward directly instead of posterior here to
-        # obtain the full covariance matrix
-        pref_posterior = self.model(Y)
-        pref_mean = pref_posterior.mean
+        pref_posterior = self.model.posterior(Y)
+        pref_mean = pref_posterior.mean.squeeze(-1)
         pref_cov = pref_posterior.covariance_matrix
         delta = pref_mean[..., 0] - pref_mean[..., 1]
-        sigma = torch.sqrt(
-            pref_cov[..., 0, 0]
-            + pref_cov[..., 1, 1]
-            - pref_cov[..., 0, 1]
-            - pref_cov[..., 1, 0]
-            + SIGMA_JITTER
-        )
+
+        w = torch.tensor([1.0, -1.0], dtype=pref_cov.dtype, device=pref_cov.device)
+        var = w @ pref_cov @ w
+        sigma = torch.sqrt(var.clamp(min=SIGMA_JITTER))
 
         u = delta / sigma
 
@@ -115,3 +117,76 @@ def forward(self, X: Tensor) -> Tensor:
         if self.previous_winner is None:
             acqf_val = acqf_val + pref_mean[..., 1]
         return acqf_val
+
+
+class PairwiseBayesianActiveLearningByDisagreement(MCAcquisitionFunction):
+    r"""MC Bayesian Active Learning by Disagreement"""
+
+    def __init__(
+        self,
+        pref_model: Model,
+        outcome_model: Optional[DeterministicModel] = None,
+        num_samples: Optional[int] = 1024,
+        std_noise: Optional[float] = 0.0,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Monte Carlo implementation of Bayesian Active Learning by Disagreement (BALD)
+        proposed in [Houlsby2011bald]_.
+
+        Args:
+            pref_model: The preference model that maps the outcomes (i.e., Y) to
+                scalar-valued utility.
+            outcome_model: A deterministic model that maps parameters (i.e., X) to outcomes
+                (i.e., Y). The outcome model f defines the search space of Y = f(X).
+                If model is None, we are directly calculating BALD on the parameter
+                space.
+            num_samples: number of samples to approximate the conditional_entropy.
+            std_noise: Additional observational noise to include. Defaults to 0.
+        """
+        super().__init__(model=pref_model)
+        # ensure the model is in eval mode
+        self.add_module("outcome_model", outcome_model)
+
+        self.num_samples = num_samples
+        # assuming the relative observation noise is fixed at 1.0 (e.g., in PairwiseGP)
+        self.std_noise = std_noise
+        self.std_normal = Normal(0, 1)
+
+    @t_batch_mode_transform(expected_q=2)
+    def forward(self, X: Tensor) -> Tensor:
+        r"""Evaluate MC BALD on the candidate set `X`.
+
+        Args:
+            X: A `batch_shape x 2 x d`-dim Tensor of t-batches with `q=2` `d`-dim design
+                points each.
+
+        Returns:
+            A `batch_shape'`-dim Tensor of MC BALD values at the given design points pair `X`,
+            where `batch_shape'` is the broadcasted batch shape
+            of model and input `X`.
+        """
+        Y = X if self.outcome_model is None else self.outcome_model(X)
+
+        pref_posterior = self.model.posterior(Y)
+        pref_mean = pref_posterior.mean.squeeze(-1)
+        pref_cov = pref_posterior.covariance_matrix
+
+        mu = pref_mean[..., 0] - pref_mean[..., 1]
+        w = torch.tensor([1.0, -1.0], dtype=pref_cov.dtype, device=pref_cov.device)
+        var = 2 * self.std_noise + w @ pref_cov @ w
+        sigma = torch.sqrt(var.clamp(min=SIGMA_JITTER))
+
+        # eq (3) in Houlsby, et al. (2011)
+        posterior_entropies = Bernoulli(
+            self.std_normal.cdf(mu / torch.sqrt(var + 1))
+        ).entropy()
+
+        # Sample-based approx to eq (4) in Houlsby, et al. (2011)
+        obj_samples = self.std_normal.cdf(
+            Normal(loc=mu, scale=sigma).rsample(torch.Size([self.num_samples]))
+        )
+        sample_entropies = Bernoulli(obj_samples).entropy()
+        conditional_entropies = sample_entropies.mean(dim=0)
+
+        return posterior_entropies - conditional_entropies
diff --git a/test/acquisition/test_preference.py b/test/acquisition/test_preference.py
@@ -5,51 +5,84 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-from botorch.acquisition.preference import AnalyticExpectedUtilityOfBestOption
+from botorch.acquisition.acquisition import AcquisitionFunction
+from botorch.acquisition.preference import (
+    AnalyticExpectedUtilityOfBestOption,
+    PairwiseBayesianActiveLearningByDisagreement,
+)
 from botorch.exceptions.errors import UnsupportedError
 from botorch.models import SingleTaskGP
 from botorch.models.deterministic import FixedSingleSampleModel
 from botorch.models.pairwise_gp import PairwiseGP
 from botorch.utils.testing import BotorchTestCase
 
 
-class TestAnalyticExpectedUtilityOfBestOption(BotorchTestCase):
-    def test_analytic_eubo(self):
-        twargs = {"dtype": torch.double}
-        X_dim = 3
-        Y_dim = 2
-        X = torch.rand(2, X_dim, **twargs)
-        Y = torch.rand(2, Y_dim, **twargs)
+class TestPreferenceAcquisitionFunctions(BotorchTestCase):
+    def setUp(self):
+        self.twargs = {"dtype": torch.double}
+        self.X_dim = 3
+        self.Y_dim = 2
+        X = torch.rand(2, self.X_dim, **self.twargs)
+        Y = torch.rand(2, self.Y_dim, **self.twargs)
         comps = torch.tensor([[1, 0]], dtype=torch.long)
 
-        standard_bounds = torch.zeros(2, X.shape[-1])
-        standard_bounds[1] = 1
+        self.model = SingleTaskGP(X, Y)
+        self.pref_model_on_X = PairwiseGP(X, comps)
+        self.pref_model_on_Y = PairwiseGP(Y, comps)
+        self.deterministic_model = FixedSingleSampleModel(model=self.model)
 
-        model = SingleTaskGP(X, Y)
-        pref_model = PairwiseGP(Y, comps)
+    def pairwise_preference_acqf_test(
+        self, acqf_class: AcquisitionFunction, test_previous_winner: bool
+    ):
+        for outcome_model in [self.deterministic_model, None]:
+            pref_model = (
+                self.pref_model_on_X if outcome_model is None else self.pref_model_on_Y
+            )
+            # Test with an outcome model and a preference model
+            acqf = acqf_class(pref_model=pref_model, outcome_model=outcome_model)
 
-        # Test with an outcome model and a preference model
-        one_sample_outcome_model = FixedSingleSampleModel(model=model)
-        eubo = AnalyticExpectedUtilityOfBestOption(
-            pref_model=pref_model, outcome_model=one_sample_outcome_model
-        )
+            # test forward with different number of points
+            X1 = torch.rand(1, self.X_dim, **self.twargs)
+            X2 = torch.rand(2, self.X_dim, **self.twargs)
+            X3 = torch.rand(3, self.X_dim, **self.twargs)
 
-        # test forward with different number of points
-        good_X = torch.rand(2, X_dim, **twargs)
-        eubo(good_X)
+            # q = 1
+            with self.assertRaises((UnsupportedError, AssertionError)):
+                acqf(X1)
+            # q = 2
+            acqf(X2)
+            # q > 2
+            with self.assertRaises((UnsupportedError, AssertionError)):
+                acqf(X3)
 
-        bad_X = torch.rand(3, X_dim, **twargs)
-        with self.assertRaises(UnsupportedError):
-            eubo(bad_X)
+            if test_previous_winner:
+                previous_winner = (
+                    torch.rand(1, self.X_dim, **self.twargs)
+                    if outcome_model is None
+                    else torch.rand(1, self.Y_dim, **self.twargs)
+                )
+                acqf = acqf_class(
+                    pref_model=pref_model,
+                    outcome_model=outcome_model,
+                    previous_winner=previous_winner,
+                )
+                # q = 1
+                acqf(X1)
+                # q = 2
+                with self.assertRaises((UnsupportedError, AssertionError)):
+                    acqf(X2)
+                # q > 2
+                with self.assertRaises((UnsupportedError, AssertionError)):
+                    acqf(X3)
 
-        good_X = torch.rand(1, X_dim, **twargs)
-        previous_winner = torch.rand(1, Y_dim, **twargs)
-        eubo_with_winner = AnalyticExpectedUtilityOfBestOption(
-            pref_model=pref_model,
-            outcome_model=one_sample_outcome_model,
-            previous_winner=previous_winner,
+    def test_analytic_eubo(self):
+        self.pairwise_preference_acqf_test(
+            acqf_class=AnalyticExpectedUtilityOfBestOption,
+            test_previous_winner=True,
         )
-        eubo_with_winner(good_X)
 
-        # Test model=None
-        AnalyticExpectedUtilityOfBestOption(pref_model=pref_model, outcome_model=None)
+    def test_analytic_bald(self):
+        self.pairwise_preference_acqf_test(
+            acqf_class=PairwiseBayesianActiveLearningByDisagreement,
+            test_previous_winner=False,
+        )