Deprecate weights argument of risk measures in favor of a `preproce…

…ssing_function`. Summary: Deprecates the `weights` argument of risk measures in favor of a `preprocessing_function`. This is superior in that it allows better modification of the samples before computing the risk measures. This supports use cases such as filtering non-objective outcomes or applying feasibility weighting all within the risk measure itself. As a result, it helps avoid a number of if/else blocks when implementing robust optimization support in Ax. Differential Revision: D39493308 fbshipit-source-id: 14950d1757b394bb9445d1f44164c49f0ce6f115
pytorch · Sep 14, 2022 · 550e5b7 · 550e5b7
1 parent d2117e2
commit 550e5b7
Show file tree

Hide file tree

Showing 4 changed files with 200 additions and 84 deletions.
diff --git a/botorch/acquisition/multi_objective/multi_output_risk_measures.py b/botorch/acquisition/multi_objective/multi_output_risk_measures.py
@@ -62,19 +62,26 @@ class MultiOutputRiskMeasureMCObjective(
     def __init__(
         self,
         n_w: int,
+        preprocessing_function: Optional[Callable[[Tensor], Tensor]] = None,
         weights: Optional[Union[List[float], Tensor]] = None,
     ) -> None:
         r"""Transform the posterior samples to samples of a risk measure.
 
         Args:
             n_w: The size of the `w_set` to calculate the risk measure over.
+            preprocessing_function: A preprocessing function to apply to the
+                samples before computing the risk measure. This can be used to
+                remove non-objective outcomes or to align all outcomes for
+                maximization. For constrained optimization, this should also
+                apply feasibility-weighting to samples. Given a `batch x m`-dim
+                tensor of samples, this should return a `batch x m'`-dim tensor.
             weights: An optional `m`-dim tensor or list of weights for scaling
                 multi-output samples before calculating the risk measure.
-                This can also be used to make sure that all outputs are
-                correctly aligned for maximization by negating those that are
-                originally defined for minimization.
+                Deprecated, use `preprocessing_function` instead.
         """
-        super().__init__(n_w=n_w, weights=weights)
+        super().__init__(
+            n_w=n_w, preprocessing_function=preprocessing_function, weights=weights
+        )
 
     def _prepare_samples(self, samples: Tensor) -> Tensor:
         r"""Prepare samples for risk measure calculations by scaling and
@@ -86,11 +93,10 @@ def _prepare_samples(self, samples: Tensor) -> Tensor:
                 `n_w` block of samples correspond to the same input.
 
         Returns:
-            A `sample_shape x batch_shape x q x n_w x m`-dim tensor of prepared samples.
+            A `sample_shape x batch_shape x q x n_w x m'`-dim tensor of
+            prepared samples.
         """
-        if self.weights is not None:
-            self.weights = self.weights.to(samples)
-            samples = samples * self.weights
+        samples = self.preprocessing_function(samples)
         return samples.view(*samples.shape[:-2], -1, self.n_w, samples.shape[-1])
 
     @abstractmethod
@@ -104,7 +110,7 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of risk measure samples.
+            A `sample_shape x batch_shape x q x m'`-dim tensor of risk measure samples.
         """
         pass  # pragma: no cover
 
@@ -129,7 +135,7 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of expectation samples.
+            A `sample_shape x batch_shape x q x m'`-dim tensor of expectation samples.
         """
         prepared_samples = self._prepare_samples(samples)
         return prepared_samples.mean(dim=-2)
@@ -163,7 +169,7 @@ def _get_sorted_prepared_samples(self, samples: Tensor) -> Tensor:
                 `n_w` block of samples correspond to the same input.
 
         Returns:
-            A `sample_shape x batch_shape x q x n_w x m`-dim tensor of sorted samples.
+            A `sample_shape x batch_shape x q x n_w x m'`-dim tensor of sorted samples.
         """
         prepared_samples = self._prepare_samples(samples)
         return prepared_samples.sort(dim=-2, descending=True).values
@@ -178,7 +184,7 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of CVaR samples.
+            A `sample_shape x batch_shape x q x m'`-dim tensor of CVaR samples.
         """
         sorted_samples = self._get_sorted_prepared_samples(samples)
         return sorted_samples[..., self.alpha_idx :, :].mean(dim=-2)
@@ -206,7 +212,7 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of VaR samples.
+            A `sample_shape x batch_shape x q x m'`-dim tensor of VaR samples.
         """
         sorted_samples = self._get_sorted_prepared_samples(samples)
         return sorted_samples[..., self.alpha_idx, :]
@@ -225,7 +231,7 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of worst-case samples.
+            A `sample_shape x batch_shape x q x m'`-dim tensor of worst-case samples.
         """
         prepared_samples = self._prepare_samples(samples)
         return prepared_samples.min(dim=-2).values
@@ -249,6 +255,7 @@ def __init__(
         n_w: int,
         alpha: float,
         expectation: bool = False,
+        preprocessing_function: Optional[Callable[[Tensor], Tensor]] = None,
         weights: Optional[Union[List[float], Tensor]] = None,
         pad_to_n_w: bool = False,
         filter_dominated: bool = True,
@@ -262,11 +269,15 @@ def __init__(
             expectation: If True, returns the expectation of the MVaR set as is
                 done in [Cousin2013MVaR]_. Otherwise, it returns the union of all
                 values in the MVaR set. Default: False.
+            preprocessing_function: A preprocessing function to apply to the
+                samples before computing the risk measure. This can be used to
+                remove non-objective outcomes or to align all outcomes for
+                maximization. For constrained optimization, this should also
+                apply feasibility-weighting to samples. Given a `batch x m`-dim
+                tensor of samples, this should return a `batch x m'`-dim tensor.
             weights: An optional `m`-dim tensor or list of weights for scaling
                 multi-output samples before calculating the risk measure.
-                This can also be used to make sure that all outputs are
-                correctly aligned for maximization by negating those that are
-                originally defined for minimization.
+                Deprecated, use `preprocessing_function` instead.
             pad_to_n_w: If True, instead of padding up to `k'`, which is the size of
                 the largest MVaR set across all batches, we pad the MVaR set up to
                 `n_w`. This produces a return tensor of known size, however, it may
@@ -280,7 +291,9 @@ def __init__(
                 calculating the hypervolume. Disabling this is not recommended
                 if `expectation=True`.
         """
-        super().__init__(n_w=n_w, weights=weights)
+        super().__init__(
+            n_w=n_w, preprocessing_function=preprocessing_function, weights=weights
+        )
         if not 0 < alpha <= 1:
             raise ValueError("`alpha` must be in (0.0, 1.0]")
         self.alpha = alpha
@@ -464,11 +477,11 @@ def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
             X: A `batch_shape x q x d`-dim tensor of inputs. Ignored.
 
         Returns:
-            A `sample_shape x batch_shape x q x m`-dim tensor of MVaR values,
+            A `sample_shape x batch_shape x q x m'`-dim tensor of MVaR values,
             if `self.expectation=True`.
-            Otherwise, this returns a `sample_shape x batch_shape x (q * k') x m`-dim
+            Otherwise, this returns a `sample_shape x batch_shape x (q * k') x m'`-dim
             tensor, where `k'` is the maximum `k` across all batches that is returned
-            by `get_mvar_set_...`. Each `(q * k') x m` corresponds to the `k` MVaR
+            by `get_mvar_set_...`. Each `(q * k') x m'` corresponds to the `k` MVaR
             values for each `q` batch of `n_w` inputs, padded up to `k'` by repeating
             the last element. If `self.pad_to_n_w`, we set `k' = self.n_w`, producing
             a deterministic return shape.
@@ -557,15 +570,18 @@ def __init__(
                 maximization. For constrained optimization, this should also
                 apply feasibility-weighting to samples.
         """
-        super().__init__(alpha=alpha, n_w=n_w)
+        if preprocessing_function is None:
+            preprocessing_function = IdentityMCMultiOutputObjective()
+        super().__init__(
+            alpha=alpha,
+            n_w=n_w,
+            preprocessing_function=preprocessing_function,
+        )
         self.chebyshev_weights = torch.as_tensor(chebyshev_weights)
         self.baseline_Y = baseline_Y
         self.register_buffer(
             "ref_point", torch.as_tensor(ref_point) if ref_point is not None else None
         )
-        if preprocessing_function is None:
-            preprocessing_function = IdentityMCMultiOutputObjective()
-        self.preprocessing_function = preprocessing_function
         self.mvar = MVaR(n_w=self.n_w, alpha=self.alpha)
         self._chebyshev_objective = None
 
@@ -696,9 +712,8 @@ def _prepare_samples(self, samples: Tensor) -> Tensor:
         Returns:
             A `sample_shape x batch_shape x q x n_w`-dim tensor of prepared samples.
         """
-        return VaR._prepare_samples(
-            self, self.chebyshev_objective(samples).unsqueeze(-1)
-        )
+        samples = self.chebyshev_objective(samples)
+        return samples.view(*samples.shape[:-1], -1, self.n_w)
 
     @staticmethod
     def _get_Y_normalization_bounds(

diff --git a/botorch/acquisition/risk_measures.py b/botorch/acquisition/risk_measures.py
@@ -20,12 +20,22 @@
     Computer Simulation, 2014.
 """
 
+import warnings
 from abc import ABC, abstractmethod
 from math import ceil
-from typing import List, Optional, Union
+from typing import Callable, List, Optional, Union
 
 import torch
-from botorch.acquisition.objective import MCAcquisitionObjective
+from botorch.acquisition.multi_objective.objective import (
+    IdentityMCMultiOutputObjective,
+    WeightedMCMultiOutputObjective,
+)
+from botorch.acquisition.objective import (
+    IdentityMCObjective,
+    LinearMCObjective,
+    MCAcquisitionObjective,
+)
+from botorch.exceptions.errors import UnsupportedError
 from torch import Tensor
 
 
@@ -49,20 +59,47 @@ class RiskMeasureMCObjective(MCAcquisitionObjective, ABC):
     def __init__(
         self,
         n_w: int,
+        preprocessing_function: Optional[Callable[[Tensor], Tensor]] = None,
         weights: Optional[Union[List[float], Tensor]] = None,
     ) -> None:
         r"""Transform the posterior samples to samples of a risk measure.
 
         Args:
             n_w: The size of the `w_set` to calculate the risk measure over.
+            preprocessing_function: A preprocessing function to apply to the samples
+                before computing the risk measure. This can be used to scalarize
+                multi-output samples before calculating the risk measure.
+                For constrained optimization, this should also apply
+                feasibility-weighting to samples. Given a `batch x m`-dim
+                tensor of samples, this should return a `batch`-dim tensor.
             weights: An optional `m`-dim tensor or list of weights for scalarizing
                 multi-output samples before calculating the risk measure.
+                Deprecated, use `preprocessing_function` instead.
         """
         super().__init__()
         self.n_w = n_w
-        self.register_buffer(
-            "weights", torch.as_tensor(weights) if weights is not None else None
-        )
+        if weights is not None:
+            warnings.warn(
+                "`weights` argument of risk measures is deprecated and will be removed "
+                " in a future version. Use a `preprocessing_function` instead.",
+                DeprecationWarning,
+            )
+            if preprocessing_function is not None:
+                raise UnsupportedError(
+                    "`weights` and `preprocessing_function` are not supported "
+                    "together. Use only a `preprocessing_function` instead."
+                )
+            weights = torch.as_tensor(weights)
+            if self._is_mo:
+                preprocessing_function = WeightedMCMultiOutputObjective(weights=weights)
+            else:
+                preprocessing_function = LinearMCObjective(weights=weights)
+        if preprocessing_function is None:
+            if self._is_mo:
+                preprocessing_function = IdentityMCMultiOutputObjective()
+            else:
+                preprocessing_function = IdentityMCObjective()
+        self.preprocessing_function = preprocessing_function
 
     def _prepare_samples(self, samples: Tensor) -> Tensor:
         r"""Prepare samples for risk measure calculations by scalarizing and
@@ -76,15 +113,14 @@ def _prepare_samples(self, samples: Tensor) -> Tensor:
         Returns:
             A `sample_shape x batch_shape x q x n_w`-dim tensor of prepared samples.
         """
-        if samples.shape[-1] > 1 and self.weights is None:
+        if samples.shape[-1] > 1 and isinstance(
+            self.preprocessing_function, IdentityMCObjective
+        ):
             raise RuntimeError(
-                "Multi-output samples require `weights` for scalarization!"
+                "Multi-output samples should be scalarized using a "
+                "`preprocessing_function`."
             )
-        if self.weights is not None:
-            self.weights = self.weights.to(samples)
-            samples = samples @ self.weights
-        else:
-            samples = samples.squeeze(-1)
+        samples = self.preprocessing_function(samples)
         return samples.view(*samples.shape[:-1], -1, self.n_w)
 
     @abstractmethod
@@ -121,17 +157,27 @@ def __init__(
         self,
         alpha: float,
         n_w: int,
+        preprocessing_function: Optional[Callable[[Tensor], Tensor]] = None,
         weights: Optional[Union[List[float], Tensor]] = None,
     ) -> None:
         r"""Transform the posterior samples to samples of a risk measure.
 
         Args:
             alpha: The risk level, float in `(0.0, 1.0]`.
             n_w: The size of the `w_set` to calculate the risk measure over.
+            preprocessing_function: A preprocessing function to apply to the samples
+                before computing the risk measure. This can be used to scalarize
+                multi-output samples before calculating the risk measure.
+                For constrained optimization, this should also apply
+                feasibility-weighting to samples. Given a `batch x m`-dim
+                tensor of samples, this should return a `batch`-dim tensor.
             weights: An optional `m`-dim tensor or list of weights for scalarizing
-                multi-objective samples before calculating the risk measure.
+                multi-output samples before calculating the risk measure.
+                Deprecated, use `preprocessing_function` instead.
         """
-        super().__init__(n_w=n_w, weights=weights)
+        super().__init__(
+            n_w=n_w, preprocessing_function=preprocessing_function, weights=weights
+        )
         if not 0 < alpha <= 1:
             raise ValueError("alpha must be in (0.0, 1.0]")
         self.alpha = alpha
@@ -171,17 +217,30 @@ def __init__(
         self,
         alpha: float,
         n_w: int,
+        preprocessing_function: Optional[Callable[[Tensor], Tensor]] = None,
         weights: Optional[Union[List[float], Tensor]] = None,
     ) -> None:
         r"""Transform the posterior samples to samples of a risk measure.
 
         Args:
             alpha: The risk level, float in `(0.0, 1.0]`.
             n_w: The size of the `w_set` to calculate the risk measure over.
+            preprocessing_function: A preprocessing function to apply to the samples
+                before computing the risk measure. This can be used to scalarize
+                multi-output samples before calculating the risk measure.
+                For constrained optimization, this should also apply
+                feasibility-weighting to samples. Given a `batch x m`-dim
+                tensor of samples, this should return a `batch`-dim tensor.
             weights: An optional `m`-dim tensor or list of weights for scalarizing
-                multi-objective samples before calculating the risk measure.
+                multi-output samples before calculating the risk measure.
+                Deprecated, use `preprocessing_function` instead.
         """
-        super().__init__(n_w=n_w, alpha=alpha, weights=weights)
+        super().__init__(
+            n_w=n_w,
+            alpha=alpha,
+            preprocessing_function=preprocessing_function,
+            weights=weights,
+        )
         self._q = 1 - self.alpha_idx / n_w
 
     def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor: