Update the default SingleTaskGP prior (pytorch#2449)

Summary: X-link: facebook/Ax#2610 Pull Request resolved: pytorch#2449 Update of the default hyperparameter priors for the SingleTaskGP. Switch from the conventional Scale-Matern kernel with Gamma(3, 6) lengthscale prior is substituted for an RBF Kernel (without a ScaleKernel), and a change from the high-noise Gamma(1.1, 0.05) noise prior of the GaussianLikelihood to a LogNormal prior that prefers lower values. The change is made in accordance with the findings of [1]. The change is made to improve the out-of-the-box performance of the BoTorch models on high-dimensional problems. [1] Carl Hvarfner, Erik Orm Hellsten, Luigi Nardi. _Vanilla Bayesian Optimization Performs Great in High Dimensions_. ICML, 2024. Reviewed By: saitcakmak Differential Revision: D60080819
hvarfner · Jul 29, 2024 · 3138fa4 · 3138fa4
1 parent 4497a5c
commit 3138fa4
Show file tree

Hide file tree

Showing 13 changed files with 212 additions and 100 deletions.
diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py
@@ -40,8 +40,8 @@
 from botorch.models.transforms.outcome import Log, OutcomeTransform
 from botorch.models.utils import validate_input_scaling
 from botorch.models.utils.gpytorch_modules import (
-    get_gaussian_likelihood_with_gamma_prior,
-    get_matern_kernel_with_gamma_prior,
+    get_covar_module_with_dim_scaled_prior,
+    get_gaussian_likelihood_with_lognormal_prior,
     MIN_INFERRED_NOISE_LEVEL,
 )
 from botorch.utils.containers import BotorchContainer
@@ -174,7 +174,7 @@ def __init__(
         )
         if likelihood is None:
             if train_Yvar is None:
-                likelihood = get_gaussian_likelihood_with_gamma_prior(
+                likelihood = get_gaussian_likelihood_with_lognormal_prior(
                     batch_shape=self._aug_batch_shape
                 )
             else:
@@ -190,14 +190,13 @@ def __init__(
             mean_module = ConstantMean(batch_shape=self._aug_batch_shape)
         self.mean_module = mean_module
         if covar_module is None:
-            covar_module = get_matern_kernel_with_gamma_prior(
+            covar_module = get_covar_module_with_dim_scaled_prior(
                 ard_num_dims=transformed_X.shape[-1],
                 batch_shape=self._aug_batch_shape,
             )
             self._subset_batch_dict = {
                 "mean_module.raw_constant": -1,
-                "covar_module.raw_outputscale": -1,
-                "covar_module.base_kernel.raw_lengthscale": -3,
+                "covar_module.raw_lengthscale": -3,
             }
             if train_Yvar is None:
                 self._subset_batch_dict["likelihood.noise_covar.raw_noise"] = -2

diff --git a/botorch/utils/gp_sampling.py b/botorch/utils/gp_sampling.py
@@ -143,10 +143,9 @@ def __init__(
         """
         if not isinstance(kernel, ScaleKernel):
             base_kernel = kernel
-            outputscale = torch.tensor(
-                1.0,
-                dtype=base_kernel.lengthscale.dtype,
-                device=base_kernel.lengthscale.device,
+            outputscale = torch.ones(kernel.batch_shape).to(
+                dtype=kernel.lengthscale.dtype,
+                device=kernel.lengthscale.device,
             )
         else:
             base_kernel = kernel.base_kernel

diff --git a/test/models/test_converter.py b/test/models/test_converter.py
@@ -23,7 +23,7 @@
 from botorch.models.transforms.outcome import Standardize
 from botorch.utils.test_helpers import SimpleGPyTorchModel
 from botorch.utils.testing import BotorchTestCase
-from gpytorch.kernels import RBFKernel
+from gpytorch.kernels import MaternKernel, RBFKernel
 from gpytorch.likelihoods import GaussianLikelihood
 from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
 from gpytorch.priors import LogNormalPrior
@@ -134,13 +134,17 @@ def test_model_list_to_batched(self):
                 model_list_to_batched(ModelListGP(gp1, gp2))
             # check scalar agreement
             gp2 = SingleTaskGP(train_X, train_Y2)
-            gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0)
+
+            # modified to check the scalar agreement in a parameter that is accessible
+            # since the error is going to slip through for the non-parametrizable
+            # priors regardless (like the LogNormal)
+            gp2.likelihood.noise_covar.raw_noise_constraint.lower_bound.fill_(1e-3)
             with self.assertRaises(UnsupportedError):
                 model_list_to_batched(ModelListGP(gp1, gp2))
             # check tensor shape agreement
             gp2 = SingleTaskGP(train_X, train_Y2)
-            gp2.covar_module.raw_outputscale = torch.nn.Parameter(
-                torch.tensor([0.0], device=self.device, dtype=dtype)
+            gp2.likelihood.noise_covar.raw_noise = torch.nn.Parameter(
+                torch.tensor([[0.42]], device=self.device, dtype=dtype)
             )
             with self.assertRaises(UnsupportedError):
                 model_list_to_batched(ModelListGP(gp1, gp2))
@@ -155,14 +159,15 @@ def test_model_list_to_batched(self):
             with self.assertRaises(NotImplementedError):
                 model_list_to_batched(ModelListGP(gp2))
             # test non-default kernel
-            gp1 = SingleTaskGP(train_X, train_Y1, covar_module=RBFKernel())
-            gp2 = SingleTaskGP(train_X, train_Y2, covar_module=RBFKernel())
+            gp1 = SingleTaskGP(train_X, train_Y1, covar_module=MaternKernel())
+            gp2 = SingleTaskGP(train_X, train_Y2, covar_module=MaternKernel())
             list_gp = ModelListGP(gp1, gp2)
             batch_gp = model_list_to_batched(list_gp)
-            self.assertEqual(type(batch_gp.covar_module), RBFKernel)
+            self.assertEqual(type(batch_gp.covar_module), MaternKernel)
             # test error when component GPs have different kernel types
-            gp1 = SingleTaskGP(train_X, train_Y1, covar_module=RBFKernel())
-            gp2 = SingleTaskGP(train_X, train_Y2)
+            # added types for both default and non-default kernels for clarity
+            gp1 = SingleTaskGP(train_X, train_Y1, covar_module=MaternKernel())
+            gp2 = SingleTaskGP(train_X, train_Y2, covar_module=RBFKernel())
             list_gp = ModelListGP(gp1, gp2)
             with self.assertRaises(UnsupportedError):
                 model_list_to_batched(list_gp)

diff --git a/test/models/test_deterministic.py b/test/models/test_deterministic.py
@@ -172,7 +172,7 @@ def test_FixedSingleSampleModel(self):
         post = model.posterior(test_X)
         original_output = post.mean + post.variance.sqrt() * w
         fss_output = fss_model(test_X)
-        self.assertTrue(torch.equal(original_output, fss_output))
+        self.assertAllClose(original_output, fss_output)
 
         self.assertTrue(hasattr(fss_model, "num_outputs"))
 

diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
@@ -23,7 +23,7 @@
 from botorch.utils.sampling import manual_seed
 from botorch.utils.test_helpers import get_pvar_expected
 from botorch.utils.testing import _get_random_data, BotorchTestCase
-from gpytorch.kernels import MaternKernel, RBFKernel, ScaleKernel
+from gpytorch.kernels import RBFKernel
 from gpytorch.likelihoods import (
     _GaussianLikelihoodBase,
     FixedNoiseGaussianLikelihood,
@@ -33,7 +33,7 @@
 from gpytorch.means import ConstantMean, ZeroMean
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
 from gpytorch.mlls.noise_model_added_loss_term import NoiseModelAddedLossTerm
-from gpytorch.priors import GammaPrior
+from gpytorch.priors import LogNormalPrior
 
 
 class TestGPRegressionBase(BotorchTestCase):
@@ -96,10 +96,10 @@ def test_gp(self, double_only: bool = False):
 
             # test init
             self.assertIsInstance(model.mean_module, ConstantMean)
-            self.assertIsInstance(model.covar_module, ScaleKernel)
-            matern_kernel = model.covar_module.base_kernel
-            self.assertIsInstance(matern_kernel, MaternKernel)
-            self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior)
+            self.assertIsInstance(model.covar_module, RBFKernel)
+            rbf_kernel = model.covar_module
+            self.assertIsInstance(rbf_kernel, RBFKernel)
+            self.assertIsInstance(rbf_kernel.lengthscale_prior, LogNormalPrior)
             if use_octf:
                 self.assertIsInstance(model.outcome_transform, Standardize)
             if use_intf:

diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
@@ -25,7 +25,7 @@
 from botorch.sampling.normal import IIDNormalSampler
 from botorch.utils.testing import _get_random_data, BotorchTestCase
 from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
-from gpytorch.kernels import MaternKernel, ScaleKernel
+from gpytorch.kernels import RBFKernel
 from gpytorch.likelihoods import LikelihoodList
 from gpytorch.likelihoods.gaussian_likelihood import (
     FixedNoiseGaussianLikelihood,
@@ -34,7 +34,7 @@
 from gpytorch.means import ConstantMean
 from gpytorch.mlls import SumMarginalLogLikelihood
 from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
-from gpytorch.priors import GammaPrior
+from gpytorch.priors import LogNormalPrior
 from torch import Tensor
 
 
@@ -104,10 +104,8 @@ def _base_test_ModelListGP(
         self.assertEqual(model.num_outputs, 2)
         for m in model.models:
             self.assertIsInstance(m.mean_module, ConstantMean)
-            self.assertIsInstance(m.covar_module, ScaleKernel)
-            matern_kernel = m.covar_module.base_kernel
-            self.assertIsInstance(matern_kernel, MaternKernel)
-            self.assertIsInstance(matern_kernel.lengthscale_prior, GammaPrior)
+            self.assertIsInstance(m.covar_module, RBFKernel)
+            self.assertIsInstance(m.covar_module.lengthscale_prior, LogNormalPrior)
             if outcome_transform != "None":
                 self.assertIsInstance(
                     m.outcome_transform, (Log, Standardize, ChainedOutcomeTransform)

diff --git a/test/optim/test_fit.py b/test/optim/test_fit.py
@@ -29,9 +29,9 @@ def setUp(self) -> None:
         self.mlls = {}
         with torch.random.fork_rng():
             torch.manual_seed(0)
-            train_X = torch.linspace(0, 1, 10).unsqueeze(-1)
-            train_Y = torch.sin((2 * math.pi) * train_X)
-            train_Y = train_Y + 0.1 * torch.randn_like(train_Y)
+            train_X = torch.linspace(0, 1, 30).unsqueeze(-1)
+            train_Y = torch.sin((6 * math.pi) * train_X)
+            train_Y = train_Y + 0.01 * torch.randn_like(train_Y)
 
         model = SingleTaskGP(
             train_X=train_X,

diff --git a/test/optim/utils/test_model_utils.py b/test/optim/utils/test_model_utils.py
@@ -6,6 +6,7 @@
 
 from __future__ import annotations
 
+import itertools
 import re
 import warnings
 from copy import deepcopy
@@ -16,6 +17,10 @@
 import torch
 from botorch import settings
 from botorch.models import SingleTaskGP
+from botorch.models.utils.gpytorch_modules import (
+    get_covar_module_with_dim_scaled_prior,
+    get_matern_kernel_with_gamma_prior,
+)
 from botorch.optim.utils import (
     get_data_loader,
     get_name_filter,
@@ -158,10 +163,18 @@ def test__get_name_filter(self) -> None:
 
 class TestSampleAllPriors(BotorchTestCase):
     def test_sample_all_priors(self):
-        for dtype in (torch.float, torch.double):
+        for dtype, covar_module in itertools.product(
+            (torch.float, torch.double),
+            (
+                get_covar_module_with_dim_scaled_prior(ard_num_dims=5),
+                get_matern_kernel_with_gamma_prior(ard_num_dims=5),
+            ),
+        ):
             train_X = torch.rand(3, 5, device=self.device, dtype=dtype)
             train_Y = torch.rand(3, 1, device=self.device, dtype=dtype)
-            model = SingleTaskGP(train_X=train_X, train_Y=train_Y)
+            model = SingleTaskGP(
+                train_X=train_X, train_Y=train_Y, covar_module=covar_module
+            )
             mll = ExactMarginalLogLikelihood(model.likelihood, model)
             mll.to(device=self.device, dtype=dtype)
             original_state_dict = dict(deepcopy(mll.model.state_dict()))
@@ -173,7 +186,10 @@ def test_sample_all_priors(self):
                 != original_state_dict["likelihood.noise_covar.raw_noise"]
             )
             # check that lengthscales are all different
-            ls = model.covar_module.base_kernel.raw_lengthscale.view(-1).tolist()
+            if isinstance(model.covar_module, ScaleKernel):
+                ls = model.covar_module.base_kernel.raw_lengthscale.view(-1).tolist()
+            else:
+                ls = model.covar_module.raw_lengthscale.view(-1).tolist()
             self.assertTrue(all(ls[0] != ls[i]) for i in range(1, len(ls)))
 
             # change one of the priors to a dummy prior that does not support sampling

diff --git a/test/test_fit.py b/test/test_fit.py
@@ -27,7 +27,7 @@
 from botorch.settings import debug
 from botorch.utils.context_managers import module_rollback_ctx, TensorCheckpoint
 from botorch.utils.testing import BotorchTestCase
-from gpytorch.kernels import MaternKernel
+from gpytorch.kernels import RBFKernel
 from gpytorch.mlls import ExactMarginalLogLikelihood, VariationalELBO
 from linear_operator.utils.errors import NotPSDError
 
@@ -136,8 +136,7 @@ def setUp(self, suppress_input_warnings: bool = True) -> None:
                     input_transform=Normalize(d=1),
                     outcome_transform=Standardize(m=output_dim),
                 )
-                self.assertIsInstance(model.covar_module.base_kernel, MaternKernel)
-                model.covar_module.base_kernel.nu = 2.5
+                self.assertIsInstance(model.covar_module, RBFKernel)
 
                 mll = ExactMarginalLogLikelihood(model.likelihood, model)
                 for dtype in (torch.float32, torch.float64):

diff --git a/test_community/models/test_gp_regression_multisource.py b/test_community/models/test_gp_regression_multisource.py
@@ -14,6 +14,10 @@
 from botorch.exceptions import InputDataError, OptimizationWarning
 from botorch.models import SingleTaskGP
 from botorch.models.transforms import Normalize, Standardize
+from botorch.models.utils.gpytorch_modules import (
+    get_gaussian_likelihood_with_gamma_prior,
+    get_matern_kernel_with_gamma_prior,
+)
 from botorch.posteriors import GPyTorchPosterior
 from botorch.sampling import SobolQMCNormalSampler
 from botorch.utils.test_helpers import get_pvar_expected
@@ -65,6 +69,12 @@ def _get_model_and_data(
             "train_Yvar": torch.full_like(train_Y, 0.01) if train_Yvar else None,
             "outcome_transform": outcome_transform,
             "input_transform": input_transform,
+            "covar_module": get_matern_kernel_with_gamma_prior(
+                ard_num_dims=train_X.shape[-1] - 1
+            ),
+            "likelihood": (
+                None if train_Yvar else get_gaussian_likelihood_with_gamma_prior()
+            ),
         }
         model = SingleTaskAugmentedGP(**model_kwargs, **extra_model_kwargs)
         return model, model_kwargs
@@ -109,8 +119,18 @@ def test_get_reliable_observation(self):
         true_y = torch.sin(x).reshape(-1, 1)
         y = torch.cos(x).reshape(-1, 1)
 
-        model0 = SingleTaskGP(x, true_y)
-        model1 = SingleTaskGP(x, y)
+        model0 = SingleTaskGP(
+            x,
+            true_y,
+            covar_module=get_matern_kernel_with_gamma_prior(x.shape[-1]),
+            likelihood=get_gaussian_likelihood_with_gamma_prior(),
+        )
+        model1 = SingleTaskGP(
+            x,
+            y,
+            covar_module=get_matern_kernel_with_gamma_prior(x.shape[-1]),
+            likelihood=get_gaussian_likelihood_with_gamma_prior(),
+        )
 
         res = _get_reliable_observations(model0, model1, x)
         true_res = torch.cat([torch.arange(0, 5, 1), torch.arange(9, 15, 1)]).int()

diff --git a/tutorials/constraint_active_search.ipynb b/tutorials/constraint_active_search.ipynb
@@ -249,7 +249,7 @@
         "        return radius * r * z\n",
         "\n",
         "    def _get_base_point_mask(self, X):\n",
-        "        distance_matrix = self.model.models[0].covar_module.base_kernel.covar_dist(\n",
+        "        distance_matrix = self.model.models[0].covar_module.covar_dist(\n",
         "            X, self.base_points\n",
         "        )\n",
         "        return smooth_mask(distance_matrix, self.punchout_radius)\n",
@@ -676,9 +676,18 @@
         "\n",
         "\n",
         "fig, ax = plt.subplots(figsize=(8, 6))\n",
-        "h1 = ax.contourf(Xplt.cpu().numpy(), Yplt.cpu().numpy(), Zplt.cpu().numpy(), 20, cmap=\"Blues\", alpha=0.6)\n",
+        "h1 = ax.contourf(\n",
+        "    Xplt.cpu().numpy(),\n",
+        "    Yplt.cpu().numpy(),\n",
+        "    Zplt.cpu().numpy(),\n",
+        "    20,\n",
+        "    cmap=\"Blues\",\n",
+        "    alpha=0.6,\n",
+        ")\n",
         "fig.colorbar(h1)\n",
-        "ax.contour(Xplt.cpu().numpy(), Yplt.cpu().numpy(), Zplt.cpu().numpy(), [0.55, 0.75], colors=\"k\")\n",
+        "ax.contour(\n",
+        "    Xplt.cpu().numpy(), Yplt.cpu().numpy(), Zplt.cpu().numpy(), [0.55, 0.75], colors=\"k\"\n",
+        ")\n",
         "\n",
         "feasible_inds = (\n",
         "    identify_samples_which_satisfy_constraints(Y, constraints)\n",
@@ -715,10 +724,12 @@
   ],
   "metadata": {
     "fileHeader": "",
+    "fileUid": "cb282d47-f143-4c00-9ae1-b631e97daddb",
+    "isAdHoc": false,
     "kernelspec": {
-      "display_name": "python3",
+      "display_name": "Python 3",
       "language": "python",
-      "name": "python3"
+      "name": "bento_kernel_default"
     },
     "language_info": {
       "codemirror_mode": {
@@ -732,7 +743,5 @@
       "pygments_lexer": "ipython3",
       "version": "3.9.13"
     }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 2
+  }
 }