Skip to content

Commit

Permalink
Use Standardize/Normalize by default for SingleTaskGP (#2458)
Browse files Browse the repository at this point in the history
Summary:
X-link: facebook/Ax#2630

Pull Request resolved: #2458

D60080819 recently updated the default `SingleTaskGP` BoTorch priors. One significant change was to remove the use of an outputscale, which may not work well if the outputs aren't standardized. This diff changes the `SingleTaskGP` to use `Standardize` and `Normalize` by default if no input/outcome transforms are specified (this allows users to explicitly pass in `None` if they don't want to use any transforms).

Differential Revision: D60492937
  • Loading branch information
David Eriksson authored and facebook-github-bot committed Aug 7, 2024
1 parent c1b73b8 commit ec16d7a
Show file tree
Hide file tree
Showing 15 changed files with 228 additions and 83 deletions.
8 changes: 5 additions & 3 deletions botorch/acquisition/analytic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,15 +1091,17 @@ def _get_noiseless_fantasy_model(
# are used across all batches (by default, a GP with batched training data
# uses independent hyperparameters for each batch).

# Don't apply `outcome_transform` and `input_transform` here,
# since the data being passed has already been transformed.
# So we will instead set them afterwards.
# We don't want to use the true `outcome_transform` and `input_transform` here
# since the data being passed has already been transformed. We thus pass `None`
# and will instead set them afterwards.
fantasy_model = SingleTaskGP(
train_X=model.train_inputs[0],
train_Y=model.train_targets.unsqueeze(-1),
train_Yvar=model.likelihood.noise_covar.noise.unsqueeze(-1),
covar_module=deepcopy(model.covar_module),
mean_module=deepcopy(model.mean_module),
outcome_transform=None,
input_transform=None,
)

Yvar = torch.full_like(Y_fantasized, 1e-7)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class qMultiObjectiveMaxValueEntropy(
_default_sample_shape: The `sample_shape` for the default sampler.
Example:
>>> model = SingleTaskGP(train_X, train_Y)
>>> model = SingleTaskGP(train_X, train_Y, outcome_transform=None)
>>> MESMO = qMultiObjectiveMaxValueEntropy(model, sample_pfs)
>>> mesmo = MESMO(test_X)
"""
Expand Down
8 changes: 7 additions & 1 deletion botorch/models/contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,13 @@ def __init__(
dimension is set to 1 for each categorical variable.
context_weight_dict: Known population weights of each context.
"""
super().__init__(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
super().__init__(
train_X=train_X,
train_Y=train_Y,
train_Yvar=train_Yvar,
input_transform=None,
outcome_transform=None,
)
self.covar_module = LCEAKernel(
decomposition=decomposition,
batch_shape=self._aug_batch_shape,
Expand Down
12 changes: 12 additions & 0 deletions botorch/models/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import torch
from botorch.exceptions import UnsupportedError
from botorch.exceptions.warnings import BotorchWarning
from botorch.models import SingleTaskGP
from botorch.models.gp_regression import HeteroskedasticSingleTaskGP
from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP
from botorch.models.gp_regression_mixed import MixedSingleTaskGP
Expand Down Expand Up @@ -179,6 +180,11 @@ def model_list_to_batched(model_list: ModelListGP) -> BatchedMultiOutputGPyTorch
batch_length = len(models)
covar_module = _batched_kernel(models[0].covar_module, batch_length)
kwargs["covar_module"] = covar_module
# SingleTaskGP uses default input/outcome transforms while this converter doesn't
# support outcome transforms. We need to explicitly pass down `None` to make sure
# no outcome transform is being used.
if isinstance(models[0], SingleTaskGP):
kwargs["outcome_transform"] = None

# construct the batched GP model
input_transform = getattr(models[0], "input_transform", None)
Expand Down Expand Up @@ -418,6 +424,12 @@ def batched_multi_output_to_single_output(
kwargs["train_Yvar"] = noise_covar.noise.clone().unsqueeze(-1)
if isinstance(batch_mo_model, SingleTaskMultiFidelityGP):
kwargs.update(batch_mo_model._init_args)
# SingleTaskGP uses default input/outcome transforms while this converter doesn't
# support outcome transforms. We need to explicitly pass down `None` to make sure
# no outcome transform is being used.
if isinstance(batch_mo_model, SingleTaskGP):
kwargs["outcome_transform"] = None

single_outcome_model = batch_mo_model.__class__(
input_transform=input_transform, **kwargs
)
Expand Down
27 changes: 20 additions & 7 deletions botorch/models/gp_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
import torch
from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
from botorch.models.model import FantasizeMixin
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import Log, OutcomeTransform
from botorch.models.transforms.input import InputTransform, Normalize
from botorch.models.transforms.outcome import Log, OutcomeTransform, Standardize
from botorch.models.utils import validate_input_scaling
from botorch.models.utils.gpytorch_modules import (
get_covar_module_with_dim_scaled_prior,
Expand All @@ -46,6 +46,7 @@
)
from botorch.utils.containers import BotorchContainer
from botorch.utils.datasets import SupervisedDataset
from botorch.utils.types import _DefaultType, DEFAULT
from gpytorch.constraints.constraints import GreaterThan
from gpytorch.distributions.multivariate_normal import MultivariateNormal
from gpytorch.likelihoods.gaussian_likelihood import (
Expand Down Expand Up @@ -134,8 +135,8 @@ def __init__(
likelihood: Optional[Likelihood] = None,
covar_module: Optional[Module] = None,
mean_module: Optional[Mean] = None,
outcome_transform: Optional[OutcomeTransform] = None,
input_transform: Optional[InputTransform] = None,
outcome_transform: Optional[Union[OutcomeTransform, _DefaultType]] = DEFAULT,
input_transform: Optional[Union[InputTransform, _DefaultType]] = DEFAULT,
) -> None:
r"""
Args:
Expand All @@ -154,16 +155,27 @@ def __init__(
outcome_transform: An outcome transform that is applied to the
training data during instantiation and to the posterior during
inference (that is, the `Posterior` obtained by calling
`.posterior` on the model will be on the original scale).
input_transform: An input transform that is applied in the model's
forward pass.
`.posterior` on the model will be on the original scale). We use a
`Standardize` transform if no `outcome_transform` is specified.
Pass down `None` to use no outcome transform.
input_transform: An input transform that is applied in the model's forward
pass. We use a `Normalize` transform if no `input_transform` is
specified. Pass down `None` to use no input transform.
"""
self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar)
if outcome_transform == DEFAULT:
outcome_transform = Standardize(
m=train_Y.shape[-1], batch_shape=train_X.shape[:-2]
)
if input_transform == DEFAULT:
input_transform = Normalize(d=train_X.shape[-1], transform_on_train=True)
with torch.no_grad():
transformed_X = self.transform_inputs(
X=train_X, input_transform=input_transform
)
if outcome_transform is not None:
train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar)
# Validate again after applying the transforms
self._validate_tensor_args(X=transformed_X, Y=train_Y, Yvar=train_Yvar)
ignore_X_dims = getattr(self, "_ignore_X_dims_scaling_check", None)
validate_input_scaling(
Expand Down Expand Up @@ -352,6 +364,7 @@ def __init__(
train_X=train_X,
train_Y=train_Y,
likelihood=likelihood,
outcome_transform=None,
input_transform=input_transform,
)
self.register_added_loss_term("noise_added_loss")
Expand Down
35 changes: 28 additions & 7 deletions test/acquisition/multi_objective/test_max_value_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
qMultiObjectiveMaxValueEntropy,
)
from botorch.acquisition.multi_objective.utils import compute_sample_box_decomposition
from botorch.exceptions.errors import UnsupportedError
from botorch.models.gp_regression import SingleTaskGP
from botorch.models.model_list_gp_regression import ModelListGP
from botorch.models.transforms.outcome import Standardize
Expand Down Expand Up @@ -71,15 +72,30 @@ def test_multi_objective_max_value_entropy(self):
# test batched model
train_X = torch.rand(1, 1, 2, dtype=dtype, device=self.device)
train_Y = torch.rand(1, 1, m, dtype=dtype, device=self.device)
model = SingleTaskGP(train_X, train_Y)
model = SingleTaskGP(train_X, train_Y, outcome_transform=None)
with self.assertRaises(NotImplementedError):
qMultiObjectiveMaxValueEntropy(model, dummy_sample_pareto_frontiers)
qMultiObjectiveMaxValueEntropy(
model=model, sample_pareto_frontiers=dummy_sample_pareto_frontiers
)
# test initialization
train_X = torch.rand(4, 2, dtype=dtype, device=self.device)
train_Y = torch.rand(4, m, dtype=dtype, device=self.device)
# test batched MO model
# Models with outcome transforms aren't supported.
model = SingleTaskGP(train_X, train_Y)
mesmo = qMultiObjectiveMaxValueEntropy(model, dummy_sample_pareto_frontiers)
with self.assertRaisesRegex(
UnsupportedError,
"Conversion of models with outcome transforms is currently "
"unsupported.",
):
qMultiObjectiveMaxValueEntropy(
model=ModelListGP(model, model),
sample_pareto_frontiers=dummy_sample_pareto_frontiers,
)
# test batched MO model
model = SingleTaskGP(train_X, train_Y, outcome_transform=None)
mesmo = qMultiObjectiveMaxValueEntropy(
model=model, sample_pareto_frontiers=dummy_sample_pareto_frontiers
)
self.assertEqual(mesmo.num_fantasies, 16)
# Initialize the sampler.
dummy_post = model.posterior(train_X[:1])
Expand All @@ -98,11 +114,16 @@ def test_multi_objective_max_value_entropy(self):
)
# test ModelListGP
model = ModelListGP(
*[SingleTaskGP(train_X, train_Y[:, i : i + 1]) for i in range(m)]
*[
SingleTaskGP(train_X, train_Y[:, i : i + 1], outcome_transform=None)
for i in range(m)
]
)
mock_sample_pfs = mock.Mock()
mock_sample_pfs.return_value = dummy_sample_pareto_frontiers(model=model)
mesmo = qMultiObjectiveMaxValueEntropy(model, mock_sample_pfs)
mesmo = qMultiObjectiveMaxValueEntropy(
model=model, sample_pareto_frontiers=mock_sample_pfs
)
self.assertEqual(mesmo.num_fantasies, 16)
# Initialize the sampler.
dummy_post = model.posterior(train_X[:1])
Expand Down Expand Up @@ -156,7 +177,7 @@ def test_multi_objective_max_value_entropy(self):
],
dim=1,
)
fantasy_model = SingleTaskGP(fant_X, fant_Y)
fantasy_model = SingleTaskGP(fant_X, fant_Y, outcome_transform=None)

# test with X_pending is not None
with mock.patch.object(
Expand Down
18 changes: 10 additions & 8 deletions test/acquisition/test_proximal.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def test_proximal_model_list(self):
train_X = torch.rand(5, 3, device=self.device, dtype=dtype)
train_Y = train_X.norm(dim=-1, keepdim=True)

gp = SingleTaskGP(train_X, train_Y).to(device=self.device)
gp = SingleTaskGP(train_X, train_Y)
model = ModelListGP(gp, gp)

scalarized_posterior_transform = ScalarizedPosteriorTransform(
Expand All @@ -263,11 +263,12 @@ def test_proximal_model_list(self):
EI_prox = ProximalAcquisitionFunction(EI, proximal_weights=proximal_weights)

ei = EI(test_X)
mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights))
test_prox_weight = torch.exp(mv_normal.log_prob(test_X)) / torch.exp(
mv_normal.log_prob(train_X[-1])
train_X_trans = gp.input_transform.transform(train_X[-1])
test_X_trans = gp.input_transform.transform(test_X)
mv_normal = MultivariateNormal(train_X_trans, torch.diag(proximal_weights))
test_prox_weight = torch.exp(mv_normal.log_prob(test_X_trans)) / torch.exp(
mv_normal.log_prob(train_X_trans)
)

# test calculation
ei_prox = EI_prox(test_X)

Expand All @@ -282,9 +283,10 @@ def test_proximal_model_list(self):
)

qei = qEI(test_X)
mv_normal = MultivariateNormal(train_X[-1], torch.diag(proximal_weights))
test_prox_weight = torch.exp(mv_normal.log_prob(test_X)) / torch.exp(
mv_normal.log_prob(train_X[-1])
test_X_trans = gp.input_transform.transform(test_X)
mv_normal = MultivariateNormal(train_X_trans, torch.diag(proximal_weights))
test_prox_weight = torch.exp(mv_normal.log_prob(test_X_trans)) / torch.exp(
mv_normal.log_prob(train_X_trans)
)

qei_prox = qEI_prox(test_X)
Expand Down
Loading

0 comments on commit ec16d7a

Please sign in to comment.