Moved qBayesianActiveLearning and FullyBayesianAcquisitionFunction to…

… BoTorch (#2448) Summary: Pull Request resolved: #2448 Moved BALD and FB Acquisition function to BoTorch to enable its use in Ax. bypass-github-export-checks Reviewed By: saitcakmak Differential Revision: D59701373 fbshipit-source-id: 8be115796423a078997229ce984b2b8ac66a1748
pytorch · Jul 31, 2024 · 9ddd9eb · 9ddd9eb
1 parent e19b77f
commit 9ddd9eb
Show file tree

Hide file tree

Showing 12 changed files with 342 additions and 171 deletions.
diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
@@ -25,6 +25,9 @@
     qAnalyticProbabilityOfImprovement,
     UpperConfidenceBound,
 )
+from botorch.acquisition.bayesian_active_learning import (
+    qBayesianActiveLearningByDisagreement,
+)
 from botorch.acquisition.cost_aware import (
     GenericCostAwareUtility,
     InverseCostWeightedUtility,
@@ -98,6 +101,7 @@
     "ProbabilityOfImprovement",
     "ProximalAcquisitionFunction",
     "UpperConfidenceBound",
+    "qBayesianActiveLearningByDisagreement",
     "qAnalyticProbabilityOfImprovement",
     "qExpectedImprovement",
     "qExpectedUtilityOfBestOption",

diff --git a/botorch/acquisition/bayesian_active_learning.py b/botorch/acquisition/bayesian_active_learning.py
@@ -0,0 +1,101 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+r"""
+Acquisition functions for Bayesian active learning. This includes:
+BALD [Houlsby2011bald]_ and its batch version [kirsch2019batchbald]_.
+
+References
+
+.. [kirsch2019batchbald]
+    Andreas Kirsch, Joost van Amersfoort, Yarin Gal.
+    BatchBALD: Efficient and Diverse Batch Acquisition for Deep Bayesian
+    Active Learning.
+    In Proceedings of the Annual Conference on Neural Information
+    Processing Systems (NeurIPS), 2019.
+
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import torch
+from botorch.acquisition.acquisition import AcquisitionFunction, MCSamplerMixin
+from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
+from botorch.models.model import Model
+from botorch.utils.transforms import concatenate_pending_points, t_batch_mode_transform
+from torch import Tensor
+
+
+class FullyBayesianAcquisitionFunction(AcquisitionFunction):
+    def __init__(self, model: Model):
+        """Base class for acquisition functions which require a Fully Bayesian
+        model treatment.
+
+        Args:
+            model: A fully bayesian single-outcome model.
+        """
+        if model._is_fully_bayesian:
+            super().__init__(model)
+
+        else:
+            raise ValueError(
+                "Fully Bayesian acquisition functions require "
+                "a SaasFullyBayesianSingleTaskGP to run."
+            )
+
+
+class qBayesianActiveLearningByDisagreement(
+    FullyBayesianAcquisitionFunction, MCSamplerMixin
+):
+    def __init__(
+        self,
+        model: SaasFullyBayesianSingleTaskGP,
+        X_pending: Optional[Tensor] = None,
+    ) -> None:
+        """
+        Batch implementation [kirsch2019batchbald]_ of BALD [Houlsby2011bald]_,
+        which maximizes the mutual information between the next observation and the
+        hyperparameters of the model. Computed by informational lower bound.
+
+        Args:
+            model: A fully bayesian single-outcome model.
+            X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points.
+        """
+        super().__init__(model)
+        self.set_X_pending(X_pending)
+
+    @concatenate_pending_points
+    @t_batch_mode_transform()
+    def forward(self, X: Tensor) -> Tensor:
+        r"""Evaluate qBayesianActiveLearningByDisagreement on the candidate set `X`.
+
+        Args:
+            X: `batch_shape x q x D`-dim Tensor of input points.
+
+        Returns:
+            A `batch_shape x num_models`-dim Tensor of BALD values.
+        """
+        return self._compute_lower_bound_information_gain(X)
+
+    def _compute_lower_bound_information_gain(self, X: Tensor) -> Tensor:
+        r"""Evaluates the lower bounded information gain on the candidate set `X`.
+
+        Args:
+            X: `batch_shape x q x D`-dim Tensor of input points.
+
+        Returns:
+            A `batch_shape x num_models`-dim Tensor of information gains.
+        """
+        posterior = self.model.posterior(X, observation_noise=True)
+        marg_covar = posterior.mixture_covariance_matrix
+        cond_variances = posterior.variance
+
+        prev_entropy = torch.logdet(marg_covar).unsqueeze(-1)
+        # squeeze excess dim and mean over q-batch
+        post_ub_entropy = torch.log(cond_variances).squeeze(-1).mean(-1)
+
+        return prev_entropy - post_ub_entropy
diff --git a/botorch/acquisition/input_constructors.py b/botorch/acquisition/input_constructors.py
@@ -39,6 +39,9 @@
     ProbabilityOfImprovement,
     UpperConfidenceBound,
 )
+from botorch.acquisition.bayesian_active_learning import (
+    qBayesianActiveLearningByDisagreement,
+)
 from botorch.acquisition.cost_aware import InverseCostWeightedUtility
 from botorch.acquisition.fixed_feature import FixedFeatureAcquisitionFunction
 from botorch.acquisition.joint_entropy_search import qJointEntropySearch
@@ -1669,3 +1672,15 @@ def construct_inputs_qJES(
         "num_samples": num_samples,
     }
     return inputs
+
+
+@acqf_input_constructor(qBayesianActiveLearningByDisagreement)
+def construct_inputs_BALD(
+    model: Model,
+    X_pending: Optional[Tensor] = None,
+):
+    inputs = {
+        "model": model,
+        "X_pending": X_pending,
+    }
+    return inputs
diff --git a/botorch_community/acquisition/__init__.py b/botorch_community/acquisition/__init__.py
@@ -4,7 +4,6 @@
 # LICENSE file in the root directory of this source tree.
 
 from botorch_community.acquisition.bayesian_active_learning import (
-    qBayesianActiveLearningByDisagreement,
     qBayesianQueryByComittee,
     qBayesianVarianceReduction,
     qStatisticalDistanceActiveLearning,
@@ -17,7 +16,6 @@
 from botorch_community.acquisition.scorebo import qSelfCorrectingBayesianOptimization
 
 __all__ = [
-    "qBayesianActiveLearningByDisagreement",
     "qBayesianQueryByComittee",
     "qBayesianVarianceReduction",
     "qSelfCorrectingBayesianOptimization",

diff --git a/botorch_community/acquisition/bayesian_active_learning.py b/botorch_community/acquisition/bayesian_active_learning.py
@@ -36,9 +36,10 @@
 from typing import Optional
 
 import torch
-from botorch.acquisition.acquisition import AcquisitionFunction, MCSamplerMixin
+from botorch.acquisition.bayesian_active_learning import (
+    FullyBayesianAcquisitionFunction,
+)
 from botorch.models.fully_bayesian import MCMC_DIM, SaasFullyBayesianSingleTaskGP
-from botorch.models.model import Model
 from botorch.utils.transforms import concatenate_pending_points, t_batch_mode_transform
 
 from botorch_community.utils.stat_dist import mvn_hellinger_distance, mvn_kl_divergence
@@ -52,24 +53,6 @@
 }
 
 
-class FullyBayesianAcquisitionFunction(AcquisitionFunction):
-    def __init__(self, model: Model):
-        """Base class for acquisition functions which require a Fully Bayesian
-        model treatment.
-
-        Args:
-            model: A fully bayesian single-outcome model.
-        """
-        if model._is_fully_bayesian:
-            super().__init__(model)
-
-        else:
-            raise ValueError(
-                "Fully Bayesian acquisition functions require "
-                "a fully bayesian model (SaasFullyBayesianSingleTaskGP) to run."
-            )
-
-
 class qBayesianVarianceReduction(FullyBayesianAcquisitionFunction):
     def __init__(
         self,
@@ -128,42 +111,6 @@ def forward(self, X: Tensor) -> Tensor:
         return torch.nan_to_num(res, 0)
 
 
-class qBayesianActiveLearningByDisagreement(
-    FullyBayesianAcquisitionFunction, MCSamplerMixin
-):
-    def __init__(
-        self,
-        model: SaasFullyBayesianSingleTaskGP,
-        X_pending: Optional[Tensor] = None,
-    ) -> None:
-        """Batch implementation [kirsch2019batchbald]_ of BALD [houlsby2011bald]_,
-        which maximizes the mutual information between the next observation and the
-        hyperparameters of the model. Computed by informational lower bound.
-
-        Args:
-            model: A fully bayesian single-outcome model.
-            X_pending: A `batch_shape, m x d`-dim Tensor of `m` design points.
-        """
-        super().__init__(model)
-        self.set_X_pending(X_pending)
-
-    @concatenate_pending_points
-    @t_batch_mode_transform()
-    def forward(self, X: Tensor) -> Tensor:
-        return self._compute_lower_bound_information_gain(X)
-
-    def _compute_lower_bound_information_gain(self, X: Tensor) -> Tensor:
-        posterior = self.model.posterior(X, observation_noise=True)
-        marg_covar = posterior.mixture_covariance_matrix
-        cond_variances = posterior.variance
-
-        prev_entropy = torch.logdet(marg_covar).unsqueeze(-1)
-        # squeeze excess dim and mean over q-batch
-        post_ub_entropy = torch.log(cond_variances).squeeze(-1).mean(-1)
-
-        return prev_entropy - post_ub_entropy
-
-
 class qStatisticalDistanceActiveLearning(FullyBayesianAcquisitionFunction):
     def __init__(
         self,

diff --git a/botorch_community/acquisition/input_constructors.py b/botorch_community/acquisition/input_constructors.py
@@ -20,7 +20,6 @@
 from botorch.acquisition.utils import get_optimal_samples
 from botorch.models.model import Model
 from botorch_community.acquisition.bayesian_active_learning import (
-    qBayesianActiveLearningByDisagreement,
     qBayesianQueryByComittee,
     qBayesianVarianceReduction,
     qStatisticalDistanceActiveLearning,
@@ -44,18 +43,6 @@ def construct_inputs_BAL(
     return inputs
 
 
-@acqf_input_constructor(qBayesianActiveLearningByDisagreement)
-def construct_inputs_BALD(
-    model: Model,
-    X_pending: Optional[Tensor] = None,
-):
-    inputs = {
-        "model": model,
-        "X_pending": X_pending,
-    }
-    return inputs
-
-
 @acqf_input_constructor(qStatisticalDistanceActiveLearning)
 def construct_inputs_SAL(
     model: Model,

diff --git a/botorch_community/acquisition/scorebo.py b/botorch_community/acquisition/scorebo.py
@@ -26,14 +26,14 @@
 import torch
 from botorch import settings
 from botorch.acquisition.acquisition import MCSamplerMixin
+from botorch.acquisition.bayesian_active_learning import (
+    FullyBayesianAcquisitionFunction,
+)
 from botorch.models.fully_bayesian import MCMC_DIM, SaasFullyBayesianSingleTaskGP
 from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL
 from botorch.models.utils import fantasize as fantasize_flag
 from botorch.utils.transforms import concatenate_pending_points, t_batch_mode_transform
-from botorch_community.acquisition.bayesian_active_learning import (
-    DISTANCE_METRICS,
-    FullyBayesianAcquisitionFunction,
-)
+from botorch_community.acquisition.bayesian_active_learning import DISTANCE_METRICS
 from torch import Tensor
 
 # The lower bound on the CDF value of the max-values

diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst
@@ -143,12 +143,16 @@ Active Learning Acquisition Functions
 .. automodule:: botorch.acquisition.active_learning
     :members:
 
+Bayesian Active Learning Acquisition Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.acquisition.bayesian_active_learning
+    :members:
+
 Preference Acquisition Functions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.preference
     :members:
 
-
 Objectives and Cost-Aware Utilities
 -------------------------------------------