Skip to content

Commit

Permalink
Fix fantasization with FixedNoiseGP and outcome transforms and use Fa…
Browse files Browse the repository at this point in the history
…ntasizeMixin (#2011)

Summary:

This fixes fantasization with FixedNoiseGP when using outcome transforms----previously, already-transformed noise was transformed again during fantasization.

This also improves the fantasization for batched and batched multi-output models to use the average noise for each batch and output.

This also removes repeated code and uses the logic in `FantasizeMixin.fantasize` for handling `X` with size 0 on the -2 dimension.

This also deprecates the use of `observation_noise` as a boolean argument to fantasize.

Reviewed By: Balandat

Differential Revision: D49200325
  • Loading branch information
sdaulton authored and facebook-github-bot committed Sep 18, 2023
1 parent fa51038 commit 169cb69
Show file tree
Hide file tree
Showing 15 changed files with 259 additions and 100 deletions.
3 changes: 2 additions & 1 deletion botorch/acquisition/active_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def forward(self, X: Tensor) -> Tensor:
# Construct the fantasy model (we actually do not use the full model,
# this is just a convenient way of computing fast posterior covariances
fantasy_model = self.model.fantasize(
X=X, sampler=self.sampler, observation_noise=True
X=X,
sampler=self.sampler,
)

bdims = tuple(1 for _ in X.shape[:-2])
Expand Down
9 changes: 6 additions & 3 deletions botorch/acquisition/knowledge_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ def forward(self, X: Tensor) -> Tensor:

# construct the fantasy model of shape `num_fantasies x b`
fantasy_model = self.model.fantasize(
X=X_actual, sampler=self.sampler, observation_noise=True
X=X_actual,
sampler=self.sampler,
)

# get the value function
Expand Down Expand Up @@ -233,7 +234,8 @@ def evaluate(self, X: Tensor, bounds: Tensor, **kwargs: Any) -> Tensor:

# construct the fantasy model of shape `num_fantasies x b`
fantasy_model = self.model.fantasize(
X=X, sampler=self.sampler, observation_noise=True
X=X,
sampler=self.sampler,
)

# get the value function
Expand Down Expand Up @@ -451,7 +453,8 @@ def forward(self, X: Tensor) -> Tensor:
# construct the fantasy model of shape `num_fantasies x b`
# expand X (to potentially add trace observations)
fantasy_model = self.model.fantasize(
X=self.expand(X_eval), sampler=self.sampler, observation_noise=True
X=self.expand(X_eval),
sampler=self.sampler,
)
# get the value function
value_function = _get_value_function(
Expand Down
3 changes: 2 additions & 1 deletion botorch/acquisition/max_value_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,8 @@ def set_X_pending(self, X_pending: Optional[Tensor] = None) -> None:
if X_pending is not None:
# fantasize the model and use this as the new model
self.model = init_model.fantasize(
X=X_pending, sampler=self.fantasies_sampler, observation_noise=True
X=X_pending,
sampler=self.fantasies_sampler,
)
else:
self.model = init_model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,8 @@ def set_X_pending(self, X_pending: Optional[Tensor] = None) -> None:
if X_pending is not None:
# fantasize the model
fantasy_model = self._init_model.fantasize(
X=X_pending, sampler=self.fantasies_sampler, observation_noise=True
X=X_pending,
sampler=self.fantasies_sampler,
)
self.mo_model = fantasy_model
# convert model to batched single outcome model.
Expand Down
3 changes: 1 addition & 2 deletions botorch/acquisition/multi_step_lookahead.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def _step(
# construct fantasy model (with batch shape f_{j+1} x ... x f_1 x batch_shape)
prop_grads = step_index > 0 # need to propagate gradients for steps > 0
fantasy_model = model.fantasize(
X=X, sampler=samplers[0], observation_noise=True, propagate_grads=prop_grads
X=X, sampler=samplers[0], propagate_grads=prop_grads
)

# augment sample weights appropriately
Expand Down Expand Up @@ -585,7 +585,6 @@ def _get_induced_fantasy_model(
fantasy_model = model.fantasize(
X=Xs[0],
sampler=samplers[0],
observation_noise=True,
)

return _get_induced_fantasy_model(
Expand Down
50 changes: 26 additions & 24 deletions botorch/models/gp_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@

from __future__ import annotations

from typing import Any, List, NoReturn, Optional, Union
from typing import Any, List, NoReturn, Optional

import torch
from botorch import settings
from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
from botorch.models.model import FantasizeMixin
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import Log, OutcomeTransform
from botorch.models.utils import fantasize as fantasize_flag, validate_input_scaling
from botorch.models.utils import validate_input_scaling
from botorch.models.utils.gpytorch_modules import (
get_gaussian_likelihood_with_gamma_prior,
get_matern_kernel_with_gamma_prior,
Expand Down Expand Up @@ -164,7 +163,7 @@ def forward(self, x: Tensor) -> MultivariateNormal:
return MultivariateNormal(mean_x, covar_x)


class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP):
class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP, FantasizeMixin):
r"""A single-task exact GP model using fixed noise levels.
A single-task exact GP that uses fixed observation noise levels, differing from
Expand Down Expand Up @@ -270,7 +269,7 @@ def fantasize(
self,
X: Tensor,
sampler: MCSampler,
observation_noise: Union[bool, Tensor] = True,
observation_noise: Optional[Tensor] = None,
**kwargs: Any,
) -> FixedNoiseGP:
r"""Construct a fantasy model.
Expand All @@ -290,29 +289,32 @@ def fantasize(
`batch_shape` is the batch shape (must be compatible with the
batch shape of the model).
sampler: The sampler used for sampling from the posterior at `X`.
observation_noise: If True, include the mean across the observation
noise in the training data as observation noise in the posterior
from which the samples are drawn. If a Tensor, use it directly
as the specified measurement noise.
observation_noise: The noise level for fantasization if
provided. If `None`, the mean across the observation
noise in the training data is used as observation noise in
the posterior from which the samples are drawn and
the fantasized noise level. If observation noise is
provided, it is assumed to be in the outcome-transformed
space, if an outcome transform is used.
Returns:
The constructed fantasy model.
"""
propagate_grads = kwargs.pop("propagate_grads", False)
with fantasize_flag():
with settings.propagate_grads(propagate_grads):
post_X = self.posterior(
X, observation_noise=observation_noise, **kwargs
)
Y_fantasized = sampler(post_X) # num_fantasies x batch_shape x n' x m
# Use the mean of the previous noise values (TODO: be smarter here).
# noise should be batch_shape x q x m when X is batch_shape x q x d, and
# Y_fantasized is num_fantasies x batch_shape x q x m.
noise_shape = Y_fantasized.shape[1:]
noise = self.likelihood.noise.mean().expand(noise_shape)
return self.condition_on_observations(
X=self.transform_inputs(X), Y=Y_fantasized, noise=noise
)
# self.likelihood.noise is an `batch_shape x n x s(m)`-dimensional tensor
if observation_noise is None:
if self.num_outputs > 1:
# make noise ... x n x m
observation_noise = self.likelihood.noise.transpose(-1, -2)
else:
observation_noise = self.likelihood.noise.unsqueeze(-1)
observation_noise = observation_noise.mean(dim=-2, keepdim=True)

return super().fantasize(
X=X,
sampler=sampler,
observation_noise=observation_noise,
**kwargs,
)

def forward(self, x: Tensor) -> MultivariateNormal:
# TODO: reduce redundancy with the 'forward' method of
Expand Down
32 changes: 25 additions & 7 deletions botorch/models/gpytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ def posterior(
jointly.
observation_noise: If True, add the observation noise from the
likelihood to the posterior. If a Tensor, use it directly as the
observation noise (must be of shape `(batch_shape) x q`).
observation noise (must be of shape `(batch_shape) x q`). It is
assumed to be in the outcome-transformed space if an outcome
transform is used.
posterior_transform: An optional PosteriorTransform.
Returns:
Expand Down Expand Up @@ -223,7 +225,8 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
# pass the transformed data to get_fantasy_model below
# (unless we've already trasnformed if BatchedMultiOutputGPyTorchModel)
if not isinstance(self, BatchedMultiOutputGPyTorchModel):
Y, Yvar = self.outcome_transform(Y, Yvar)
# `noise` is assumed to already be outcome-transformed.
Y, _ = self.outcome_transform(Y, Yvar)
# validate using strict=False, since we cannot tell if Y has an explicit
# output dimension
self._validate_tensor_args(X=X, Y=Y, Yvar=Yvar, strict=False)
Expand Down Expand Up @@ -373,18 +376,32 @@ def posterior(
)
mvn = self(X)
if observation_noise is not False:
if self._num_outputs > 1:
noise_shape = X.shape[:-3] + torch.Size(
[self._num_outputs, X.shape[-2]]
)
else:
noise_shape = X.shape[:-1]
if torch.is_tensor(observation_noise):
# TODO: Validate noise shape
# make observation_noise `batch_shape x q x n`
if self.num_outputs > 1:
obs_noise = observation_noise.transpose(-1, -2)
else:
obs_noise = observation_noise.squeeze(-1)
mvn = self.likelihood(mvn, X, noise=obs_noise)
mvn = self.likelihood(
mvn,
X,
noise=obs_noise.expand(noise_shape),
)
elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
# Use the mean of the previous noise values (TODO: be smarter here).
noise = self.likelihood.noise.mean().expand(X.shape[:-1])
mvn = self.likelihood(mvn, X, noise=noise)
observation_noise = self.likelihood.noise.mean(dim=-1, keepdim=True)
mvn = self.likelihood(
mvn,
X,
noise=observation_noise.expand(noise_shape),
)
else:
mvn = self.likelihood(mvn, X)
if self._num_outputs > 1:
Expand Down Expand Up @@ -443,8 +460,9 @@ def condition_on_observations(
"""
noise = kwargs.get("noise")
if hasattr(self, "outcome_transform"):
# we need to apply transforms before shifting batch indices around
Y, noise = self.outcome_transform(Y, noise)
# We need to apply transforms before shifting batch indices around.
# `noise` is assumed to already be outcome-transformed.
Y, _ = self.outcome_transform(Y)
self._validate_tensor_args(X=X, Y=Y, Yvar=noise, strict=False)
inputs = X
if self._num_outputs > 1:
Expand Down
57 changes: 47 additions & 10 deletions botorch/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
import numpy as np
import torch
from botorch import settings
from botorch.exceptions.errors import BotorchTensorDimensionError, InputDataError
from botorch.exceptions.errors import (
BotorchTensorDimensionError,
DeprecationError,
InputDataError,
)
from botorch.logging import shape_to_str
from botorch.models.utils.assorted import fantasize as fantasize_flag
from botorch.posteriors import Posterior, PosteriorList
Expand Down Expand Up @@ -83,7 +87,7 @@ def posterior(
self,
X: Tensor,
output_indices: Optional[List[int]] = None,
observation_noise: bool = False,
observation_noise: Union[bool, Tensor] = False,
posterior_transform: Optional[PosteriorTransform] = None,
**kwargs: Any,
) -> Posterior:
Expand All @@ -102,7 +106,12 @@ def posterior(
Can be used to speed up computation if only a subset of the
model's outputs are required for optimization. If omitted,
computes the posterior over all model outputs.
observation_noise: If True, add observation noise to the posterior.
observation_noise: For models with an inferred noise level, if True,
include observation noise. For models with an observed noise level,
this must be a `model_batch_shape x 1 x m`-dim tensor or
a `model_batch_shape x n' x m`-dim tensor containing the average
noise for each batch and output. `noise` must be in the
outcome-transformed space if an outcome transform is used.
posterior_transform: An optional PosteriorTransform.
Returns:
Expand Down Expand Up @@ -310,7 +319,7 @@ def fantasize(
# TODO: see if any of these can be imported only if TYPE_CHECKING
X: Tensor,
sampler: MCSampler,
observation_noise: bool = True,
observation_noise: Optional[Tensor] = None,
**kwargs: Any,
) -> TFantasizeMixin:
r"""Construct a fantasy model.
Expand All @@ -328,12 +337,21 @@ def fantasize(
`batch_shape` is the batch shape (must be compatible with the
batch shape of the model).
sampler: The sampler used for sampling from the posterior at `X`.
observation_noise: If True, include observation noise.
observation_noise: A `model_batch_shape x 1 x m`-dim tensor or
a `model_batch_shape x n' x m`-dim tensor containing the average
noise for each batch and output, where `m` is the number of outputs.
`noise` must be in the outcome-transformed space if an outcome
transform is used. If None, then the noise will be the inferred
noise level.
kwargs: Will be passed to `model.condition_on_observations`
Returns:
The constructed fantasy model.
"""
if not isinstance(observation_noise, Tensor) and observation_noise is not None:
raise DeprecationError(
"`fantasize` no longer accepts a boolean for `observation_noise`."
)
# if the inputs are empty, expand the inputs
if X.shape[-2] == 0:
output_shape = (
Expand All @@ -350,8 +368,15 @@ def fantasize(
propagate_grads = kwargs.pop("propagate_grads", False)
with fantasize_flag():
with settings.propagate_grads(propagate_grads):
post_X = self.posterior(X, observation_noise=observation_noise)
post_X = self.posterior(
X,
observation_noise=True
if observation_noise is None
else observation_noise,
)
Y_fantasized = sampler(post_X) # num_fantasies x batch_shape x n' x m
if observation_noise is not None:
kwargs["noise"] = observation_noise.expand(Y_fantasized.shape[1:])
return self.condition_on_observations(
X=self.transform_inputs(X), Y=Y_fantasized, **kwargs
)
Expand Down Expand Up @@ -434,7 +459,9 @@ def posterior(
respective likelihoods to the posterior. If a Tensor of shape
`(batch_shape) x q x m`, use it directly as the observation
noise (with `observation_noise[...,i]` added to the posterior
of the `i`-th model).
of the `i`-th model). `observation_noise` is assumed
to be in the outcome-transformed space, if an outcome transform
is used by the model.
posterior_transform: An optional PosteriorTransform.
Returns:
Expand Down Expand Up @@ -553,7 +580,7 @@ def fantasize(
self,
X: Tensor,
sampler: MCSampler,
observation_noise: bool = True,
observation_noise: Optional[Tensor] = None,
evaluation_mask: Optional[Tensor] = None,
**kwargs: Any,
) -> Model:
Expand All @@ -573,7 +600,12 @@ def fantasize(
batch shape of the model).
sampler: The sampler used for sampling from the posterior at `X`. If
evaluation_mask is not None, this must be a `ListSampler`.
observation_noise: If True, include observation noise.
observation_noise: A `model_batch_shape x 1 x m`-dim tensor or
a `model_batch_shape x n' x m`-dim tensor containing the average
noise for each batch and output, where `m` is the number of outputs.
`noise` must be in the outcome-transformed space if an outcome
transform is used. If None, then the noise will be the inferred
noise level.
evaluation_mask: A `n' x m`-dim tensor of booleans indicating which
outputs should be fantasized for a given design. This uses the same
evaluation mask for all batches.
Expand All @@ -595,6 +627,8 @@ def fantasize(

fant_models = []
X_i = X
if observation_noise is None:
observation_noise_i = observation_noise
for i in range(self.num_outputs):
# get the inputs to fantasize at for output i
if evaluation_mask is not None:
Expand All @@ -604,12 +638,15 @@ def fantasize(
# samples from a single Sobol sequence or consider requiring that the
# sampling is IID to ensure good coverage.
sampler_i = sampler.samplers[i]
if observation_noise is not None:
observation_noise_i = observation_noise[..., mask_i, i : i + 1]
else:
sampler_i = sampler

fant_model = self.models[i].fantasize(
X=X_i,
sampler=sampler_i,
observation_noise=observation_noise,
observation_noise=observation_noise_i,
**kwargs,
)
fant_models.append(fant_model)
Expand Down
1 change: 1 addition & 0 deletions botorch/utils/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ def _get_random_data(
[torch.linspace(0, 0.95, n, **tkwargs) for _ in range(d)], dim=-1
)
train_x = train_x + 0.05 * torch.rand_like(train_x).repeat(rep_shape)
train_x[0] += 0.02 # modify the first batch
train_y = torch.sin(train_x[..., :1] * (2 * math.pi))
train_y = train_y + 0.2 * torch.randn(n, m, **tkwargs).repeat(rep_shape)
return train_x, train_y
Expand Down
Loading

0 comments on commit 169cb69

Please sign in to comment.