Use Standardize outcome transform by default in more models

Summary: Makes models which had their priors updated in pytorch#2507 use the `Standardize` outcome transform by default, mimicking pytorch#2458 Also removes some deprecated functionality in the process, namely the `data_fidelity` argument to `SingleTaskMultiFidelityGP` as well as the `FixedNoiseMultiFidelityGP` and `FixedNoiseLCEMGP` models. TODO: Some unit tests don't actually test things with (even the now default) outcome transform - those will need to be updated. Differential Revision: D62552307
Balandat · Sep 12, 2024 · f50bcfe · f50bcfe
1 parent 127fc08
commit f50bcfe
Show file tree

Hide file tree

Showing 13 changed files with 100 additions and 210 deletions.
diff --git a/botorch/models/approximate_gp.py b/botorch/models/approximate_gp.py
@@ -37,7 +37,7 @@
 import torch
 from botorch.models.gpytorch import GPyTorchModel
 from botorch.models.transforms.input import InputTransform
-from botorch.models.transforms.outcome import OutcomeTransform
+from botorch.models.transforms.outcome import OutcomeTransform, Standardize
 from botorch.models.utils import validate_input_scaling
 from botorch.models.utils.gpytorch_modules import (
     get_covar_module_with_dim_scaled_prior,
@@ -48,6 +48,7 @@
     InducingPointAllocator,
 )
 from botorch.posteriors.gpytorch import GPyTorchPosterior
+from botorch.utils.types import _DefaultType, DEFAULT
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import Kernel
 from gpytorch.likelihoods import (
@@ -325,9 +326,9 @@ def __init__(
         variational_distribution: Optional[_VariationalDistribution] = None,
         variational_strategy: type[_VariationalStrategy] = VariationalStrategy,
         inducing_points: Optional[Union[Tensor, int]] = None,
-        outcome_transform: Optional[OutcomeTransform] = None,
-        input_transform: Optional[InputTransform] = None,
         inducing_point_allocator: Optional[InducingPointAllocator] = None,
+        outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT,
+        input_transform: Optional[InputTransform] = None,
     ) -> None:
         r"""
         Args:
@@ -338,6 +339,8 @@ def __init__(
                 either a `GaussianLikelihood` (if `num_outputs=1`) or a
                 `MultitaskGaussianLikelihood`(if `num_outputs>1`).
             num_outputs: Number of output responses per input (default: 1).
+            learn_inducing_points: If True, the inducing point locations are learned
+                jointly with the other model parameters.
             covar_module: Kernel function. If omitted, uses an `RBFKernel`.
             mean_module: Mean of GP model. If omitted, uses a `ConstantMean`.
             variational_distribution: Type of variational distribution to use
@@ -351,12 +354,24 @@ def __init__(
             inducing_point_allocator: The `InducingPointAllocator` used to
                 initialize the inducing point locations. If omitted,
                 uses `GreedyVarianceReduction`.
+            outcome_transform: An outcome transform that is applied to the
+                training data during instantiation and to the posterior during
+                inference (that is, the `Posterior` obtained by calling
+                `.posterior` on the model will be on the original scale). We use a
+                `Standardize` transform if no `outcome_transform` is specified.
+                Pass down `None` to use no outcome transform.
+            input_transform: An input transform that is applied in the model's
+                forward pass.
         """
         with torch.no_grad():
             transformed_X = self.transform_inputs(
                 X=train_X, input_transform=input_transform
             )
         if train_Y is not None:
+            if outcome_transform == DEFAULT:
+                outcome_transform = Standardize(
+                    m=train_Y.shape[-1], batch_shape=train_X.shape[:-2]
+                )
             if outcome_transform is not None:
                 train_Y, _ = outcome_transform(train_Y)
             self._validate_tensor_args(X=transformed_X, Y=train_Y)
@@ -388,6 +403,7 @@ def __init__(
                 "being further optimized during the model fit. If so "
                 "then set `learn_inducing_points` to False.",
                 UserWarning,
+                stacklevel=3,
             )
 
         if inducing_point_allocator is None:
@@ -409,7 +425,7 @@ def __init__(
 
         super().__init__(model=model, likelihood=likelihood, num_outputs=num_outputs)
 
-        if outcome_transform is not None:
+        if outcome_transform not in (None, DEFAULT):
             self.outcome_transform = outcome_transform
         if input_transform is not None:
             self.input_transform = input_transform

diff --git a/botorch/models/contextual_multioutput.py b/botorch/models/contextual_multioutput.py
@@ -13,14 +13,14 @@
     Advances in Neural Information Processing Systems 33, NeurIPS 2020.
 """
 
-import warnings
 from typing import Any, Optional, Union
 
 import torch
 from botorch.models.multitask import MultiTaskGP
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.utils.datasets import MultiTaskDataset, SupervisedDataset
+from botorch.utils.types import _DefaultType, DEFAULT
 from gpytorch.constraints import Interval
 from gpytorch.kernels.rbf_kernel import RBFKernel
 from gpytorch.likelihoods.likelihood import Likelihood
@@ -51,8 +51,8 @@ def __init__(
         embs_dim_list: Optional[list[int]] = None,
         output_tasks: Optional[list[int]] = None,
         all_tasks: Optional[list[int]] = None,
+        outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT,
         input_transform: Optional[InputTransform] = None,
-        outcome_transform: Optional[OutcomeTransform] = None,
     ) -> None:
         r"""
         Args:
@@ -85,12 +85,14 @@ def __init__(
                 training data. Note that when a task is not observed, the corresponding
                 task covariance will heavily depend on random initialization and may
                 behave unexpectedly.
-            input_transform: An input transform that is applied in the model's
-                forward pass.
             outcome_transform: An outcome transform that is applied to the
                 training data during instantiation and to the posterior during
                 inference (that is, the `Posterior` obtained by calling
-                `.posterior` on the model will be on the original scale).
+                `.posterior` on the model will be on the original scale). We use a
+                `Standardize` transform if no `outcome_transform` is specified.
+                Pass down `None` to use no outcome transform.
+            input_transform: An input transform that is applied in the model's
+                forward pass.
         """
         super().__init__(
             train_X=train_X,
@@ -102,8 +104,8 @@ def __init__(
             likelihood=likelihood,
             output_tasks=output_tasks,
             all_tasks=all_tasks,
-            input_transform=input_transform,
             outcome_transform=outcome_transform,
+            input_transform=input_transform,
         )
         self.device = train_X.device
         if all_tasks is None:
@@ -247,62 +249,3 @@ def construct_inputs(
         if embs_dim_list is not None:
             base_inputs["embs_dim_list"] = embs_dim_list
         return base_inputs
-
-
-class FixedNoiseLCEMGP(LCEMGP):
-    r"""The Multi-Task GP the latent context embedding multioutput
-    (LCE-M) kernel, with known observation noise.
-
-    DEPRECATED: Please use `LCEMGP` with `train_Yvar` instead.
-    Will be removed in a future release (~v0.11).
-    """
-
-    def __init__(
-        self,
-        train_X: Tensor,
-        train_Y: Tensor,
-        train_Yvar: Tensor,
-        task_feature: int,
-        context_cat_feature: Optional[Tensor] = None,
-        context_emb_feature: Optional[Tensor] = None,
-        embs_dim_list: Optional[list[int]] = None,
-        output_tasks: Optional[list[int]] = None,
-    ) -> None:
-        r"""
-        Args:
-            train_X: (n x d) X training data.
-            train_Y: (n x 1) Y training data.
-            train_Yvar: (n x 1) Observed variances of each training Y.
-            task_feature: Column index of train_X to get context indices.
-            context_cat_feature: (n_contexts x k) one-hot encoded context
-                features. Rows are ordered by context indices, where k is the
-                number of categorical variables. If None, task indices will
-                be used and k = 1.
-            context_emb_feature: (n_contexts x m) pre-given continuous
-                embedding features. Rows are ordered by context indices.
-            embs_dim_list: Embedding dimension for each categorical variable.
-                The length equals to k. If None, the embedding dimension is set to
-                1 for each categorical variable.
-            output_tasks: A list of task indices for which to compute model
-                outputs for. If omitted, return outputs for all task indices.
-
-        """
-        warnings.warn(
-            "`FixedNoiseLCEMGP` has been deprecated and will be removed in a "
-            "future release. Please use the `LCEMGP` model instead. "
-            "When `train_Yvar` is specified, `LCEMGP` behaves the same "
-            "as the `FixedNoiseLCEMGP`.",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-
-        super().__init__(
-            train_X=train_X,
-            train_Y=train_Y,
-            task_feature=task_feature,
-            train_Yvar=train_Yvar,
-            context_cat_feature=context_cat_feature,
-            context_emb_feature=context_emb_feature,
-            embs_dim_list=embs_dim_list,
-            output_tasks=output_tasks,
-        )
diff --git a/botorch/models/fully_bayesian_multitask.py b/botorch/models/fully_bayesian_multitask.py
@@ -254,6 +254,7 @@ def __init__(
             task_feature=task_feature,
             output_tasks=output_tasks,
             rank=rank,
+            outcome_transform=None,
         )
         if all_tasks is not None and self._expected_task_values != set(all_tasks):
             raise NotImplementedError(

diff --git a/botorch/models/gp_regression_fidelity.py b/botorch/models/gp_regression_fidelity.py
@@ -25,8 +25,7 @@
 
 from __future__ import annotations
 
-import warnings
-from typing import Any, Optional, Union
+from typing import Any, Optional, Sequence, Union
 
 import torch
 from botorch.exceptions.errors import UnsupportedError
@@ -40,6 +39,7 @@
 from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.models.utils.gpytorch_modules import get_covar_module_with_dim_scaled_prior
 from botorch.utils.datasets import SupervisedDataset
+from botorch.utils.types import _DefaultType, DEFAULT
 from gpytorch.kernels.kernel import ProductKernel
 from gpytorch.kernels.scale_kernel import ScaleKernel
 from gpytorch.likelihoods.likelihood import Likelihood
@@ -69,11 +69,10 @@ def __init__(
         train_Yvar: Optional[Tensor] = None,
         iteration_fidelity: Optional[int] = None,
         data_fidelities: Optional[Union[list[int], tuple[int]]] = None,
-        data_fidelity: Optional[int] = None,
         linear_truncated: bool = True,
         nu: float = 2.5,
         likelihood: Optional[Likelihood] = None,
-        outcome_transform: Optional[OutcomeTransform] = None,
+        outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT,
         input_transform: Optional[InputTransform] = None,
     ) -> None:
         r"""
@@ -89,33 +88,21 @@ def __init__(
             data_fidelities: The column indices for the downsampling fidelity parameter.
                 If a list/tuple of indices is provided, a kernel will be constructed for
                 each index (optional).
-            data_fidelity: The column index for the downsampling fidelity parameter
-                (optional). Deprecated in favor of `data_fidelities`.
             linear_truncated: If True, use a `LinearTruncatedFidelityKernel` instead
                 of the default kernel.
             nu: The smoothness parameter for the Matern kernel: either 1/2, 3/2, or
                 5/2. Only used when `linear_truncated=True`.
             likelihood: A likelihood. If omitted, use a standard GaussianLikelihood
                 with inferred noise level.
             outcome_transform: An outcome transform that is applied to the
-                    training data during instantiation and to the posterior during
-                    inference (that is, the `Posterior` obtained by calling
-                    `.posterior` on the model will be on the original scale).
+                training data during instantiation and to the posterior during
+                inference (that is, the `Posterior` obtained by calling
+                `.posterior` on the model will be on the original scale). We use a
+                `Standardize` transform if no `outcome_transform` is specified.
+                Pass down `None` to use no outcome transform.
             input_transform: An input transform that is applied in the model's
                     forward pass.
         """
-        if data_fidelity is not None:
-            warnings.warn(
-                "The `data_fidelity` argument is deprecated and will be removed in "
-                "a future release. Please use `data_fidelities` instead.",
-                DeprecationWarning,
-            )
-            if data_fidelities is not None:
-                raise ValueError(
-                    "Cannot specify both `data_fidelity` and `data_fidelities`."
-                )
-            data_fidelities = [data_fidelity]
-
         self._init_args = {
             "iteration_fidelity": iteration_fidelity,
             "data_fidelities": data_fidelities,
@@ -179,47 +166,11 @@ def construct_inputs(
         return inputs
 
 
-class FixedNoiseMultiFidelityGP(SingleTaskMultiFidelityGP):
-    def __init__(
-        self,
-        train_X: Tensor,
-        train_Y: Tensor,
-        train_Yvar: Tensor,
-        iteration_fidelity: Optional[int] = None,
-        data_fidelities: Optional[Union[list[int], tuple[int]]] = None,
-        data_fidelity: Optional[int] = None,
-        linear_truncated: bool = True,
-        nu: float = 2.5,
-        outcome_transform: Optional[OutcomeTransform] = None,
-        input_transform: Optional[InputTransform] = None,
-    ) -> None:
-        r"""DEPRECATED: Use `SingleTaskMultiFidelityGP` instead.
-        Will be removed in a future release (~v0.11).
-        """
-        warnings.warn(
-            "`FixedNoiseMultiFidelityGP` has been deprecated. "
-            "Use `SingleTaskMultiFidelityGP` with `train_Yvar` instead.",
-            DeprecationWarning,
-        )
-        super().__init__(
-            train_X=train_X,
-            train_Y=train_Y,
-            train_Yvar=train_Yvar,
-            iteration_fidelity=iteration_fidelity,
-            data_fidelities=data_fidelities,
-            data_fidelity=data_fidelity,
-            linear_truncated=linear_truncated,
-            nu=nu,
-            outcome_transform=outcome_transform,
-            input_transform=input_transform,
-        )
-
-
 def _setup_multifidelity_covar_module(
     dim: int,
     aug_batch_shape: torch.Size,
     iteration_fidelity: Optional[int],
-    data_fidelities: Optional[list[int]],
+    data_fidelities: Optional[Sequence[int]],
     linear_truncated: bool,
     nu: float,
 ) -> tuple[ScaleKernel, dict]:
@@ -246,6 +197,7 @@ def _setup_multifidelity_covar_module(
     if iteration_fidelity is not None and iteration_fidelity < 0:
         iteration_fidelity = dim + iteration_fidelity
     if data_fidelities is not None:
+        data_fidelities = list(data_fidelities)
         for i in range(len(data_fidelities)):
             if data_fidelities[i] < 0:
                 data_fidelities[i] = dim + data_fidelities[i]

diff --git a/botorch/models/gp_regression_mixed.py b/botorch/models/gp_regression_mixed.py
@@ -6,7 +6,7 @@
 
 from __future__ import annotations
 
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Optional, Union
 
 import torch
 from botorch.models.gp_regression import SingleTaskGP
@@ -16,6 +16,7 @@
 from botorch.models.utils.gpytorch_modules import get_covar_module_with_dim_scaled_prior
 from botorch.utils.datasets import SupervisedDataset
 from botorch.utils.transforms import normalize_indices
+from botorch.utils.types import _DefaultType, DEFAULT
 from gpytorch.constraints import GreaterThan
 from gpytorch.kernels.kernel import Kernel
 from gpytorch.kernels.scale_kernel import ScaleKernel
@@ -65,7 +66,7 @@ def __init__(
             Callable[[torch.Size, int, list[int]], Kernel]
         ] = None,
         likelihood: Optional[Likelihood] = None,
-        outcome_transform: Optional[OutcomeTransform] = None,  # TODO
+        outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT,
         input_transform: Optional[InputTransform] = None,  # TODO
     ) -> None:
         r"""A single-task exact GP model supporting categorical parameters.
@@ -87,7 +88,9 @@ def __init__(
             outcome_transform: An outcome transform that is applied to the
                 training data during instantiation and to the posterior during
                 inference (that is, the `Posterior` obtained by calling
-                `.posterior` on the model will be on the original scale).
+                `.posterior` on the model will be on the original scale). We use a
+                `Standardize` transform if no `outcome_transform` is specified.
+                Pass down `None` to use no outcome transform.
             input_transform: An input transform that is applied in the model's
                 forward pass. Only input transforms are allowed which do not
                 transform the categorical dimensions. If you want to use it