From 2208c2c279f9110847a5baaac24ee489184bb345 Mon Sep 17 00:00:00 2001
From: Sam Daulton <sdaulton@fb.com>
Date: Mon, 18 Sep 2023 12:02:13 -0700
Subject: [PATCH] Fix fantasization with FixedNoiseGP and outcome transforms
 and use FantasizeMixin (#2011)

Summary:
Pull Request resolved: https://github.com/pytorch/botorch/pull/2011

This fixes fantasization with FixedNoiseGP when using outcome transforms----previously, already-transformed noise was transformed again during fantasization.

This also improves the fantasization for batched and batched multi-output models to use the average noise for each batch and output.

This also removes repeated code and uses the logic in `FantasizeMixin.fantasize` for handling `X` with size 0 on the -2 dimension.

This also deprecates the use of `observation_noise` as a boolean argument to fantasize.

Differential Revision: https://internalfb.com/D49200325

fbshipit-source-id: f2150e08009bfdf0f86b0b9e5908610dbb6709ee
---
 botorch/acquisition/active_learning.py        |   3 +-
 botorch/acquisition/knowledge_gradient.py     |   9 +-
 .../acquisition/max_value_entropy_search.py   |   3 +-
 .../max_value_entropy_search.py               |   3 +-
 botorch/acquisition/multi_step_lookahead.py   |   3 +-
 botorch/models/gp_regression.py               |  50 ++++----
 botorch/models/gpytorch.py                    |  32 ++++--
 botorch/models/model.py                       |  57 +++++++--
 botorch/utils/testing.py                      |   1 +
 test/models/test_gp_regression.py             |  64 ++++++++++-
 test/models/test_gp_regression_fidelity.py    |   2 -
 test/models/test_gp_regression_mixed.py       |   2 -
 test/models/test_gpytorch.py                  |  20 ++--
 test/models/test_model_list_gp_regression.py  | 108 ++++++++++++------
 test/models/test_pairwise_gp.py               |   2 -
 15 files changed, 259 insertions(+), 100 deletions(-)

diff --git a/botorch/acquisition/active_learning.py b/botorch/acquisition/active_learning.py
index 10e7183f8d..3830f692a8 100644
--- a/botorch/acquisition/active_learning.py
+++ b/botorch/acquisition/active_learning.py
@@ -93,7 +93,8 @@ def forward(self, X: Tensor) -> Tensor:
         # Construct the fantasy model (we actually do not use the full model,
         # this is just a convenient way of computing fast posterior covariances
         fantasy_model = self.model.fantasize(
-            X=X, sampler=self.sampler, observation_noise=True
+            X=X,
+            sampler=self.sampler,
         )
 
         bdims = tuple(1 for _ in X.shape[:-2])
diff --git a/botorch/acquisition/knowledge_gradient.py b/botorch/acquisition/knowledge_gradient.py
index 9155440693..eec1f1b925 100644
--- a/botorch/acquisition/knowledge_gradient.py
+++ b/botorch/acquisition/knowledge_gradient.py
@@ -184,7 +184,8 @@ def forward(self, X: Tensor) -> Tensor:
 
         # construct the fantasy model of shape `num_fantasies x b`
         fantasy_model = self.model.fantasize(
-            X=X_actual, sampler=self.sampler, observation_noise=True
+            X=X_actual,
+            sampler=self.sampler,
         )
 
         # get the value function
@@ -233,7 +234,8 @@ def evaluate(self, X: Tensor, bounds: Tensor, **kwargs: Any) -> Tensor:
 
         # construct the fantasy model of shape `num_fantasies x b`
         fantasy_model = self.model.fantasize(
-            X=X, sampler=self.sampler, observation_noise=True
+            X=X,
+            sampler=self.sampler,
         )
 
         # get the value function
@@ -451,7 +453,8 @@ def forward(self, X: Tensor) -> Tensor:
         # construct the fantasy model of shape `num_fantasies x b`
         # expand X (to potentially add trace observations)
         fantasy_model = self.model.fantasize(
-            X=self.expand(X_eval), sampler=self.sampler, observation_noise=True
+            X=self.expand(X_eval),
+            sampler=self.sampler,
         )
         # get the value function
         value_function = _get_value_function(
diff --git a/botorch/acquisition/max_value_entropy_search.py b/botorch/acquisition/max_value_entropy_search.py
index 56805c26fe..eb03e02801 100644
--- a/botorch/acquisition/max_value_entropy_search.py
+++ b/botorch/acquisition/max_value_entropy_search.py
@@ -389,7 +389,8 @@ def set_X_pending(self, X_pending: Optional[Tensor] = None) -> None:
         if X_pending is not None:
             # fantasize the model and use this as the new model
             self.model = init_model.fantasize(
-                X=X_pending, sampler=self.fantasies_sampler, observation_noise=True
+                X=X_pending,
+                sampler=self.fantasies_sampler,
             )
         else:
             self.model = init_model
diff --git a/botorch/acquisition/multi_objective/max_value_entropy_search.py b/botorch/acquisition/multi_objective/max_value_entropy_search.py
index 9c4da13744..56dbb1092e 100644
--- a/botorch/acquisition/multi_objective/max_value_entropy_search.py
+++ b/botorch/acquisition/multi_objective/max_value_entropy_search.py
@@ -146,7 +146,8 @@ def set_X_pending(self, X_pending: Optional[Tensor] = None) -> None:
         if X_pending is not None:
             # fantasize the model
             fantasy_model = self._init_model.fantasize(
-                X=X_pending, sampler=self.fantasies_sampler, observation_noise=True
+                X=X_pending,
+                sampler=self.fantasies_sampler,
             )
             self.mo_model = fantasy_model
             # convert model to batched single outcome model.
diff --git a/botorch/acquisition/multi_step_lookahead.py b/botorch/acquisition/multi_step_lookahead.py
index 8744962665..1145965bc8 100644
--- a/botorch/acquisition/multi_step_lookahead.py
+++ b/botorch/acquisition/multi_step_lookahead.py
@@ -399,7 +399,7 @@ def _step(
     # construct fantasy model (with batch shape f_{j+1} x ... x f_1 x batch_shape)
     prop_grads = step_index > 0  # need to propagate gradients for steps > 0
     fantasy_model = model.fantasize(
-        X=X, sampler=samplers[0], observation_noise=True, propagate_grads=prop_grads
+        X=X, sampler=samplers[0], propagate_grads=prop_grads
     )
 
     # augment sample weights appropriately
@@ -585,7 +585,6 @@ def _get_induced_fantasy_model(
         fantasy_model = model.fantasize(
             X=Xs[0],
             sampler=samplers[0],
-            observation_noise=True,
         )
 
         return _get_induced_fantasy_model(
diff --git a/botorch/models/gp_regression.py b/botorch/models/gp_regression.py
index 9ca5e7853f..f69b50f08e 100644
--- a/botorch/models/gp_regression.py
+++ b/botorch/models/gp_regression.py
@@ -30,15 +30,14 @@
 
 from __future__ import annotations
 
-from typing import Any, List, NoReturn, Optional, Union
+from typing import Any, List, NoReturn, Optional
 
 import torch
-from botorch import settings
 from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
 from botorch.models.model import FantasizeMixin
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import Log, OutcomeTransform
-from botorch.models.utils import fantasize as fantasize_flag, validate_input_scaling
+from botorch.models.utils import validate_input_scaling
 from botorch.models.utils.gpytorch_modules import (
     get_gaussian_likelihood_with_gamma_prior,
     get_matern_kernel_with_gamma_prior,
@@ -164,7 +163,7 @@ def forward(self, x: Tensor) -> MultivariateNormal:
         return MultivariateNormal(mean_x, covar_x)
 
 
-class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP):
+class FixedNoiseGP(BatchedMultiOutputGPyTorchModel, ExactGP, FantasizeMixin):
     r"""A single-task exact GP model using fixed noise levels.
 
     A single-task exact GP that uses fixed observation noise levels, differing from
@@ -270,7 +269,7 @@ def fantasize(
         self,
         X: Tensor,
         sampler: MCSampler,
-        observation_noise: Union[bool, Tensor] = True,
+        observation_noise: Optional[Tensor] = None,
         **kwargs: Any,
     ) -> FixedNoiseGP:
         r"""Construct a fantasy model.
@@ -290,29 +289,32 @@ def fantasize(
                 `batch_shape` is the batch shape (must be compatible with the
                 batch shape of the model).
             sampler: The sampler used for sampling from the posterior at `X`.
-            observation_noise: If True, include the mean across the observation
-                noise in the training data as observation noise in the posterior
-                from which the samples are drawn. If a Tensor, use it directly
-                as the specified measurement noise.
+            observation_noise: The noise level for fantasization if
+                provided. If `None`, the mean across the observation
+                noise in the training data is used as observation noise in
+                the posterior from which the samples are drawn and
+                the fantasized noise level. If observation noise is
+                provided, it is assumed to be in the outcome-transformed
+                space, if an outcome transform is used.
 
         Returns:
             The constructed fantasy model.
         """
-        propagate_grads = kwargs.pop("propagate_grads", False)
-        with fantasize_flag():
-            with settings.propagate_grads(propagate_grads):
-                post_X = self.posterior(
-                    X, observation_noise=observation_noise, **kwargs
-                )
-            Y_fantasized = sampler(post_X)  # num_fantasies x batch_shape x n' x m
-            # Use the mean of the previous noise values (TODO: be smarter here).
-            # noise should be batch_shape x q x m when X is batch_shape x q x d, and
-            # Y_fantasized is num_fantasies x batch_shape x q x m.
-            noise_shape = Y_fantasized.shape[1:]
-            noise = self.likelihood.noise.mean().expand(noise_shape)
-            return self.condition_on_observations(
-                X=self.transform_inputs(X), Y=Y_fantasized, noise=noise
-            )
+        # self.likelihood.noise is an `batch_shape x n x s(m)`-dimensional tensor
+        if observation_noise is None:
+            if self.num_outputs > 1:
+                # make noise ... x n x m
+                observation_noise = self.likelihood.noise.transpose(-1, -2)
+            else:
+                observation_noise = self.likelihood.noise.unsqueeze(-1)
+            observation_noise = observation_noise.mean(dim=-2, keepdim=True)
+
+        return super().fantasize(
+            X=X,
+            sampler=sampler,
+            observation_noise=observation_noise,
+            **kwargs,
+        )
 
     def forward(self, x: Tensor) -> MultivariateNormal:
         # TODO: reduce redundancy with the 'forward' method of
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
index 955ee6c2c9..9cab0f20d8 100644
--- a/botorch/models/gpytorch.py
+++ b/botorch/models/gpytorch.py
@@ -159,7 +159,9 @@ def posterior(
                 jointly.
             observation_noise: If True, add the observation noise from the
                 likelihood to the posterior. If a Tensor, use it directly as the
-                observation noise (must be of shape `(batch_shape) x q`).
+                observation noise (must be of shape `(batch_shape) x q`). It is
+                assumed to be in the outcome-transformed space if an outcome
+                transform is used.
             posterior_transform: An optional PosteriorTransform.
 
         Returns:
@@ -223,7 +225,8 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
             # pass the transformed data to get_fantasy_model below
             # (unless we've already trasnformed if BatchedMultiOutputGPyTorchModel)
             if not isinstance(self, BatchedMultiOutputGPyTorchModel):
-                Y, Yvar = self.outcome_transform(Y, Yvar)
+                # `noise` is assumed to already be outcome-transformed.
+                Y, _ = self.outcome_transform(Y, Yvar)
         # validate using strict=False, since we cannot tell if Y has an explicit
         # output dimension
         self._validate_tensor_args(X=X, Y=Y, Yvar=Yvar, strict=False)
@@ -373,6 +376,12 @@ def posterior(
                 )
             mvn = self(X)
             if observation_noise is not False:
+                if self._num_outputs > 1:
+                    noise_shape = X.shape[:-3] + torch.Size(
+                        [self._num_outputs, X.shape[-2]]
+                    )
+                else:
+                    noise_shape = X.shape[:-1]
                 if torch.is_tensor(observation_noise):
                     # TODO: Validate noise shape
                     # make observation_noise `batch_shape x q x n`
@@ -380,11 +389,19 @@ def posterior(
                         obs_noise = observation_noise.transpose(-1, -2)
                     else:
                         obs_noise = observation_noise.squeeze(-1)
-                    mvn = self.likelihood(mvn, X, noise=obs_noise)
+                    mvn = self.likelihood(
+                        mvn,
+                        X,
+                        noise=obs_noise.expand(noise_shape),
+                    )
                 elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood):
                     # Use the mean of the previous noise values (TODO: be smarter here).
-                    noise = self.likelihood.noise.mean().expand(X.shape[:-1])
-                    mvn = self.likelihood(mvn, X, noise=noise)
+                    observation_noise = self.likelihood.noise.mean(dim=-1, keepdim=True)
+                    mvn = self.likelihood(
+                        mvn,
+                        X,
+                        noise=observation_noise.expand(noise_shape),
+                    )
                 else:
                     mvn = self.likelihood(mvn, X)
             if self._num_outputs > 1:
@@ -443,8 +460,9 @@ def condition_on_observations(
         """
         noise = kwargs.get("noise")
         if hasattr(self, "outcome_transform"):
-            # we need to apply transforms before shifting batch indices around
-            Y, noise = self.outcome_transform(Y, noise)
+            # We need to apply transforms before shifting batch indices around.
+            # `noise` is assumed to already be outcome-transformed.
+            Y, _ = self.outcome_transform(Y)
         self._validate_tensor_args(X=X, Y=Y, Yvar=noise, strict=False)
         inputs = X
         if self._num_outputs > 1:
diff --git a/botorch/models/model.py b/botorch/models/model.py
index dae237c8d3..92a95d4c91 100644
--- a/botorch/models/model.py
+++ b/botorch/models/model.py
@@ -33,7 +33,11 @@
 import numpy as np
 import torch
 from botorch import settings
-from botorch.exceptions.errors import BotorchTensorDimensionError, InputDataError
+from botorch.exceptions.errors import (
+    BotorchTensorDimensionError,
+    DeprecationError,
+    InputDataError,
+)
 from botorch.logging import shape_to_str
 from botorch.models.utils.assorted import fantasize as fantasize_flag
 from botorch.posteriors import Posterior, PosteriorList
@@ -83,7 +87,7 @@ def posterior(
         self,
         X: Tensor,
         output_indices: Optional[List[int]] = None,
-        observation_noise: bool = False,
+        observation_noise: Union[bool, Tensor] = False,
         posterior_transform: Optional[PosteriorTransform] = None,
         **kwargs: Any,
     ) -> Posterior:
@@ -102,7 +106,12 @@ def posterior(
                 Can be used to speed up computation if only a subset of the
                 model's outputs are required for optimization. If omitted,
                 computes the posterior over all model outputs.
-            observation_noise: If True, add observation noise to the posterior.
+            observation_noise: For models with an inferred noise level, if True,
+                include observation noise. For models with an observed noise level,
+                this must be a `model_batch_shape x 1 x m`-dim tensor or
+                a `model_batch_shape x n' x m`-dim tensor containing the average
+                noise for each batch and output. `noise` must be in the
+                outcome-transformed space if an outcome transform is used.
             posterior_transform: An optional PosteriorTransform.
 
         Returns:
@@ -310,7 +319,7 @@ def fantasize(
         # TODO: see if any of these can be imported only if TYPE_CHECKING
         X: Tensor,
         sampler: MCSampler,
-        observation_noise: bool = True,
+        observation_noise: Optional[Tensor] = None,
         **kwargs: Any,
     ) -> TFantasizeMixin:
         r"""Construct a fantasy model.
@@ -328,12 +337,21 @@ def fantasize(
                 `batch_shape` is the batch shape (must be compatible with the
                 batch shape of the model).
             sampler: The sampler used for sampling from the posterior at `X`.
-            observation_noise: If True, include observation noise.
+            observation_noise: A `model_batch_shape x 1 x m`-dim tensor or
+                a `model_batch_shape x n' x m`-dim tensor containing the average
+                noise for each batch and output, where `m` is the number of outputs.
+                `noise` must be in the outcome-transformed space if an outcome
+                transform is used. If None, then the noise will be the inferred
+                noise level.
             kwargs: Will be passed to `model.condition_on_observations`
 
         Returns:
             The constructed fantasy model.
         """
+        if not isinstance(observation_noise, Tensor) and observation_noise is not None:
+            raise DeprecationError(
+                "`fantasize` no longer accepts a boolean for `observation_noise`."
+            )
         # if the inputs are empty, expand the inputs
         if X.shape[-2] == 0:
             output_shape = (
@@ -350,8 +368,15 @@ def fantasize(
         propagate_grads = kwargs.pop("propagate_grads", False)
         with fantasize_flag():
             with settings.propagate_grads(propagate_grads):
-                post_X = self.posterior(X, observation_noise=observation_noise)
+                post_X = self.posterior(
+                    X,
+                    observation_noise=True
+                    if observation_noise is None
+                    else observation_noise,
+                )
             Y_fantasized = sampler(post_X)  # num_fantasies x batch_shape x n' x m
+            if observation_noise is not None:
+                kwargs["noise"] = observation_noise.expand(Y_fantasized.shape[1:])
             return self.condition_on_observations(
                 X=self.transform_inputs(X), Y=Y_fantasized, **kwargs
             )
@@ -434,7 +459,9 @@ def posterior(
                 respective likelihoods to the posterior. If a Tensor of shape
                 `(batch_shape) x q x m`, use it directly as the observation
                 noise (with `observation_noise[...,i]` added to the posterior
-                of the `i`-th model).
+                of the `i`-th model). `observation_noise` is assumed
+                to be in the outcome-transformed space, if an outcome transform
+                is used by the model.
             posterior_transform: An optional PosteriorTransform.
 
         Returns:
@@ -553,7 +580,7 @@ def fantasize(
         self,
         X: Tensor,
         sampler: MCSampler,
-        observation_noise: bool = True,
+        observation_noise: Optional[Tensor] = None,
         evaluation_mask: Optional[Tensor] = None,
         **kwargs: Any,
     ) -> Model:
@@ -573,7 +600,12 @@ def fantasize(
                 batch shape of the model).
             sampler: The sampler used for sampling from the posterior at `X`. If
                 evaluation_mask is not None, this must be a `ListSampler`.
-            observation_noise: If True, include observation noise.
+            observation_noise: A `model_batch_shape x 1 x m`-dim tensor or
+                a `model_batch_shape x n' x m`-dim tensor containing the average
+                noise for each batch and output, where `m` is the number of outputs.
+                `noise` must be in the outcome-transformed space if an outcome
+                transform is used. If None, then the noise will be the inferred
+                noise level.
             evaluation_mask: A `n' x m`-dim tensor of booleans indicating which
                 outputs should be fantasized for a given design. This uses the same
                 evaluation mask for all batches.
@@ -595,6 +627,8 @@ def fantasize(
 
         fant_models = []
         X_i = X
+        if observation_noise is None:
+            observation_noise_i = observation_noise
         for i in range(self.num_outputs):
             # get the inputs to fantasize at for output i
             if evaluation_mask is not None:
@@ -604,12 +638,15 @@ def fantasize(
                 # samples from a single Sobol sequence or consider requiring that the
                 # sampling is IID to ensure good coverage.
                 sampler_i = sampler.samplers[i]
+                if observation_noise is not None:
+                    observation_noise_i = observation_noise[..., mask_i, i : i + 1]
             else:
                 sampler_i = sampler
+
             fant_model = self.models[i].fantasize(
                 X=X_i,
                 sampler=sampler_i,
-                observation_noise=observation_noise,
+                observation_noise=observation_noise_i,
                 **kwargs,
             )
             fant_models.append(fant_model)
diff --git a/botorch/utils/testing.py b/botorch/utils/testing.py
index 3ef838fb40..6ed7d37e0e 100644
--- a/botorch/utils/testing.py
+++ b/botorch/utils/testing.py
@@ -375,6 +375,7 @@ def _get_random_data(
         [torch.linspace(0, 0.95, n, **tkwargs) for _ in range(d)], dim=-1
     )
     train_x = train_x + 0.05 * torch.rand_like(train_x).repeat(rep_shape)
+    train_x[0] += 0.02  # modify the first batch
     train_y = torch.sin(train_x[..., :1] * (2 * math.pi))
     train_y = train_y + 0.2 * torch.randn(n, m, **tkwargs).repeat(rep_shape)
     return train_x, train_y
diff --git a/test/models/test_gp_regression.py b/test/models/test_gp_regression.py
index 2ac4f8f835..3e400a8354 100644
--- a/test/models/test_gp_regression.py
+++ b/test/models/test_gp_regression.py
@@ -318,8 +318,6 @@ def test_fantasize(self):
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([3]))
             fm = model.fantasize(X=X_f, sampler=sampler)
             self.assertIsInstance(fm, model.__class__)
-            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
-            self.assertIsInstance(fm, model.__class__)
 
         # check that input transforms are applied to X.
         tkwargs = {"device": self.device, "dtype": torch.float}
@@ -456,6 +454,68 @@ def test_construct_inputs(self):
             self.assertTrue(Y.equal(data_dict["train_Y"]))
             self.assertTrue(Yvar.equal(data_dict["train_Yvar"]))
 
+    def test_fantasized_noise(self):
+        for batch_shape, m, dtype, use_octf in itertools.product(
+            (torch.Size(), torch.Size([2])),
+            (1, 2),
+            (torch.float, torch.double),
+            (False, True),
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            octf = Standardize(m=m, batch_shape=batch_shape) if use_octf else None
+            model, _ = self._get_model_and_data(
+                batch_shape=batch_shape, m=m, outcome_transform=octf, **tkwargs
+            )
+            # fantasize
+            X_f = torch.rand(torch.Size(batch_shape + torch.Size([4, 1])), **tkwargs)
+            sampler = SobolQMCNormalSampler(sample_shape=torch.Size([3]))
+            fm = model.fantasize(X=X_f, sampler=sampler)
+            noise = (
+                model.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else model.likelihood.noise.transpose(-1, -2)
+            )
+            avg_noise = noise.mean(dim=-2, keepdim=True)
+            fm_noise = (
+                fm.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else fm.likelihood.noise.transpose(-1, -2)
+            )
+
+            self.assertTrue((fm_noise[..., -4:, :] == avg_noise).all())
+            # pass tensor of noise
+            # noise is assumed to be outcome transformed
+            # batch shape x n' x m
+            obs_noise = torch.full(
+                X_f.shape[:-1] + torch.Size([m]), 0.1, dtype=dtype, device=self.device
+            )
+            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=obs_noise)
+            fm_noise = (
+                fm.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else fm.likelihood.noise.transpose(-1, -2)
+            )
+            self.assertTrue((fm_noise[..., -4:, :] == obs_noise).all())
+            # test batch shape x 1 x m
+            obs_noise = torch.full(
+                X_f.shape[:-2] + torch.Size([1, m]),
+                0.1,
+                dtype=dtype,
+                device=self.device,
+            )
+            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=obs_noise)
+            fm_noise = (
+                fm.likelihood.noise.unsqueeze(-1)
+                if m == 1
+                else fm.likelihood.noise.transpose(-1, -2)
+            )
+            self.assertTrue(
+                (
+                    fm_noise[..., -4:, :]
+                    == obs_noise.expand(X_f.shape[:-1] + torch.Size([m]))
+                ).all()
+            )
+
 
 class TestHeteroskedasticSingleTaskGP(TestSingleTaskGP):
     def _get_model_and_data(
diff --git a/test/models/test_gp_regression_fidelity.py b/test/models/test_gp_regression_fidelity.py
index 778a829b5b..512d67617c 100644
--- a/test/models/test_gp_regression_fidelity.py
+++ b/test/models/test_gp_regression_fidelity.py
@@ -362,8 +362,6 @@ def test_fantasize(self):
                 sampler = SobolQMCNormalSampler(sample_shape=torch.Size([3]))
                 fm = model.fantasize(X=X_f, sampler=sampler)
                 self.assertIsInstance(fm, model.__class__)
-                fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
-                self.assertIsInstance(fm, model.__class__)
 
     def test_subset_model(self):
         for (iteration_fidelity, data_fidelities) in self.FIDELITY_TEST_PAIRS:
diff --git a/test/models/test_gp_regression_mixed.py b/test/models/test_gp_regression_mixed.py
index d7dda8319d..58afb16957 100644
--- a/test/models/test_gp_regression_mixed.py
+++ b/test/models/test_gp_regression_mixed.py
@@ -236,8 +236,6 @@ def test_fantasize(self):
             sampler = SobolQMCNormalSampler(sample_shape=torch.Size([3]))
             fm = model.fantasize(X=X_f, sampler=sampler)
             self.assertIsInstance(fm, model.__class__)
-            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
-            self.assertIsInstance(fm, model.__class__)
 
     def test_subset_model(self):
         d, m = 3, 2
diff --git a/test/models/test_gpytorch.py b/test/models/test_gpytorch.py
index f527ceb0d4..020f3a63a1 100644
--- a/test/models/test_gpytorch.py
+++ b/test/models/test_gpytorch.py
@@ -15,7 +15,7 @@
     BotorchTensorDimensionError,
     BotorchTensorDimensionWarning,
 )
-from botorch.exceptions.errors import InputDataError
+from botorch.exceptions.errors import DeprecationError, InputDataError
 from botorch.fit import fit_gpytorch_mll
 from botorch.models.gpytorch import (
     BatchedMultiOutputGPyTorchModel,
@@ -208,11 +208,6 @@ def test_gpytorch_model(self):
             cm = model.fantasize(torch.rand(2, 1, **tkwargs), sampler=sampler)
             self.assertIsInstance(cm, SimpleGPyTorchModel)
             self.assertEqual(cm.train_targets.shape, torch.Size([2, 7]))
-            cm = model.fantasize(
-                torch.rand(2, 1, **tkwargs), sampler=sampler, observation_noise=True
-            )
-            self.assertIsInstance(cm, SimpleGPyTorchModel)
-            self.assertEqual(cm.train_targets.shape, torch.Size([2, 7]))
             cm = model.fantasize(
                 torch.rand(2, 1, **tkwargs),
                 sampler=sampler,
@@ -220,6 +215,14 @@ def test_gpytorch_model(self):
             )
             self.assertIsInstance(cm, SimpleGPyTorchModel)
             self.assertEqual(cm.train_targets.shape, torch.Size([2, 7]))
+            # test that boolean observation noise is deprecated
+            msg = "`fantasize` no longer accepts a boolean for `observation_noise`."
+            with self.assertRaisesRegex(DeprecationError, msg):
+                model.fantasize(
+                    torch.rand(2, 1, **tkwargs),
+                    sampler=sampler,
+                    observation_noise=True,
+                )
 
     def test_validate_tensor_args(self) -> None:
         n, d = 3, 2
@@ -386,11 +389,6 @@ def test_batched_multi_output_gpytorch_model(self):
             cm = model.fantasize(torch.rand(2, 1, **tkwargs), sampler=sampler)
             self.assertIsInstance(cm, SimpleBatchedMultiOutputGPyTorchModel)
             self.assertEqual(cm.train_targets.shape, torch.Size([2, 2, 7]))
-            cm = model.fantasize(
-                torch.rand(2, 1, **tkwargs), sampler=sampler, observation_noise=True
-            )
-            self.assertIsInstance(cm, SimpleBatchedMultiOutputGPyTorchModel)
-            self.assertEqual(cm.train_targets.shape, torch.Size([2, 2, 7]))
             cm = model.fantasize(
                 torch.rand(2, 1, **tkwargs),
                 sampler=sampler,
diff --git a/test/models/test_model_list_gp_regression.py b/test/models/test_model_list_gp_regression.py
index 232a36a8bc..fb9c80c535 100644
--- a/test/models/test_model_list_gp_regression.py
+++ b/test/models/test_model_list_gp_regression.py
@@ -379,38 +379,82 @@ def test_transform_revert_train_inputs(self):
             self.assertTrue(torch.equal(m._original_train_inputs, org_inputs[i]))
 
     def test_fantasize(self):
-        m1 = SingleTaskGP(torch.rand(5, 2), torch.rand(5, 1)).eval()
-        m2 = SingleTaskGP(torch.rand(5, 2), torch.rand(5, 1)).eval()
-        modellist = ModelListGP(m1, m2)
-        fm = modellist.fantasize(
-            torch.rand(3, 2), sampler=IIDNormalSampler(sample_shape=torch.Size([2]))
-        )
-        self.assertIsInstance(fm, ModelListGP)
-        for i in range(2):
-            fm_i = fm.models[i]
-            self.assertIsInstance(fm_i, SingleTaskGP)
-            self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, 8, 2]))
-            self.assertEqual(fm_i.train_targets.shape, torch.Size([2, 8]))
-
-        # test decoupled
-        sampler1 = IIDNormalSampler(sample_shape=torch.Size([2]))
-        sampler2 = IIDNormalSampler(sample_shape=torch.Size([2]))
-        eval_mask = torch.tensor(
-            [[1, 0], [0, 1], [1, 0]],
-            dtype=torch.bool,
-        )
-        fm = modellist.fantasize(
-            torch.rand(3, 2),
-            sampler=ListSampler(sampler1, sampler2),
-            evaluation_mask=eval_mask,
-        )
-        self.assertIsInstance(fm, ModelListGP)
-        for i in range(2):
-            fm_i = fm.models[i]
-            self.assertIsInstance(fm_i, SingleTaskGP)
-            num_points = 7 - i
-            self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, num_points, 2]))
-            self.assertEqual(fm_i.train_targets.shape, torch.Size([2, num_points]))
+        for model_cls in (SingleTaskGP, FixedNoiseGP):
+            x1 = torch.rand(5, 2)
+            y1 = torch.rand(5, 1)
+            x2 = torch.rand(5, 2)
+            y2 = torch.rand(5, 1)
+            m1_kwargs = {}
+            m2_kwargs = {}
+            if model_cls is FixedNoiseGP:
+                m1_kwargs = {"train_Yvar": torch.full_like(y1, 0.1)}
+                m2_kwargs = {"train_Yvar": torch.full_like(y2, 0.2)}
+            m1 = model_cls(x1, y1, **m1_kwargs).eval()
+            m2 = model_cls(x2, y2, **m2_kwargs).eval()
+            modellist = ModelListGP(m1, m2)
+            fm = modellist.fantasize(
+                torch.rand(3, 2), sampler=IIDNormalSampler(sample_shape=torch.Size([2]))
+            )
+            self.assertIsInstance(fm, ModelListGP)
+            for i in range(2):
+                fm_i = fm.models[i]
+                self.assertIsInstance(fm_i, model_cls)
+                self.assertEqual(fm_i.train_inputs[0].shape, torch.Size([2, 8, 2]))
+                self.assertEqual(fm_i.train_targets.shape, torch.Size([2, 8]))
+
+            # test decoupled
+            sampler1 = IIDNormalSampler(sample_shape=torch.Size([2]))
+            sampler2 = IIDNormalSampler(sample_shape=torch.Size([2]))
+            eval_mask = torch.tensor(
+                [[1, 0], [0, 1], [1, 0]],
+                dtype=torch.bool,
+            )
+            num_designs_per_output = eval_mask.sum(dim=0)
+            fm = modellist.fantasize(
+                torch.rand(3, 2),
+                sampler=ListSampler(sampler1, sampler2),
+                evaluation_mask=eval_mask,
+            )
+            self.assertIsInstance(fm, ModelListGP)
+            for i in range(2):
+                fm_i = fm.models[i]
+                self.assertIsInstance(fm_i, model_cls)
+                num_points = 7 - i
+                self.assertEqual(
+                    fm_i.train_inputs[0].shape, torch.Size([2, num_points, 2])
+                )
+                self.assertEqual(fm_i.train_targets.shape, torch.Size([2, num_points]))
+            # test decoupled with observation_noise
+            if model_cls is FixedNoiseGP:
+                # already transformed
+                observation_noise = torch.full(
+                    (3, 2), 0.3, dtype=x1.dtype, device=x1.device
+                )
+                observation_noise[:, 1] = 0.4
+                fm = modellist.fantasize(
+                    torch.rand(3, 2),
+                    sampler=ListSampler(sampler1, sampler2),
+                    evaluation_mask=eval_mask,
+                    observation_noise=observation_noise,
+                )
+                self.assertIsInstance(fm, ModelListGP)
+                for i in range(2):
+                    fm_i = fm.models[i]
+                    self.assertIsInstance(fm_i, model_cls)
+                    num_points = 7 - i
+                    self.assertEqual(
+                        fm_i.train_inputs[0].shape, torch.Size([2, num_points, 2])
+                    )
+                    self.assertEqual(
+                        fm_i.train_targets.shape, torch.Size([2, num_points])
+                    )
+                    # check observation_noise
+                    self.assertTrue(
+                        torch.equal(
+                            fm_i.likelihood.noise[..., -num_designs_per_output[i] :],
+                            observation_noise[-num_designs_per_output[i] :, i],
+                        )
+                    )
 
     def test_fantasize_with_outcome_transform(self) -> None:
         """
diff --git a/test/models/test_pairwise_gp.py b/test/models/test_pairwise_gp.py
index a4b8c7167a..d0e0b3dac6 100644
--- a/test/models/test_pairwise_gp.py
+++ b/test/models/test_pairwise_gp.py
@@ -382,8 +382,6 @@ def test_fantasize(self) -> None:
             sampler = PairwiseSobolQMCNormalSampler(sample_shape=torch.Size([3]))
             fm = model.fantasize(X=X_f, sampler=sampler)
             self.assertIsInstance(fm, model.__class__)
-            fm = model.fantasize(X=X_f, sampler=sampler, observation_noise=False)
-            self.assertIsInstance(fm, model.__class__)
 
     def test_load_state_dict(self) -> None:
         model, _ = self._get_model_and_data(batch_shape=[])