Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make eta configurable #1526

Closed
wants to merge 14 commits into from
31 changes: 25 additions & 6 deletions botorch/acquisition/multi_objective/monte_carlo.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def __init__(
sampler: Optional[MCSampler] = None,
objective: Optional[MCMultiOutputObjective] = None,
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
eta: Optional[Union[Tensor, float]] = 1e-3,
X_pending: Optional[Tensor] = None,
) -> None:
r"""Constructor for the MCAcquisitionFunction base class.
Expand All @@ -102,6 +103,12 @@ def __init__(
`sample_shape x batch-shape x q x m` to a Tensor of dimension
`sample_shape x batch-shape x q`, where negative values imply
feasibility.
eta: The temperature parameter for the sigmoid function used for the
differentiable approximation of the constraints. In case of a float the
same eta is used for every constraint in constraints. In case of a
tensor the length of the tensor must match the number of provided
constraints. The i-th constraint is then estimated with the i-th
eta value.
X_pending: A `m x d`-dim Tensor of `m` design points that have
points that have been submitted for function evaluation
but have not yet been evaluated.
Expand All @@ -128,6 +135,10 @@ def __init__(
)
self.add_module("objective", objective)
self.constraints = constraints
if constraints:
if type(eta) != Tensor:
eta = torch.full((len(constraints),), eta)
self.register_buffer("eta", eta)
self.X_pending = None
if X_pending is not None:
self.set_X_pending(X_pending)
Expand All @@ -153,7 +164,7 @@ def __init__(
objective: Optional[MCMultiOutputObjective] = None,
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
X_pending: Optional[Tensor] = None,
eta: float = 1e-3,
eta: Optional[Union[Tensor, float]] = 1e-3,
) -> None:
r"""q-Expected Hypervolume Improvement supporting m>=2 outcomes.

Expand Down Expand Up @@ -189,7 +200,11 @@ def __init__(
been evaluated. Concatenated into `X` upon forward call. Copied and set
to have no gradient.
eta: The temperature parameter for the sigmoid function used for the
differentiable approximation of the constraints.
differentiable approximation of the constraints. In case of a float the
same eta is used for every constraint in constraints. In case of a
tensor the length of the tensor must match the number of provided
constraints. The i-th constraint is then estimated with the i-th
eta value.
"""
if len(ref_point) != partitioning.num_outcomes:
raise ValueError(
Expand All @@ -207,9 +222,9 @@ def __init__(
sampler=sampler,
objective=objective,
constraints=constraints,
eta=eta,
X_pending=X_pending,
)
self.eta = eta
self.register_buffer("ref_point", ref_point)
cell_bounds = partitioning.get_hypercell_bounds()
self.register_buffer("cell_lower_bounds", cell_bounds[0])
Expand Down Expand Up @@ -357,7 +372,7 @@ def __init__(
objective: Optional[MCMultiOutputObjective] = None,
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
X_pending: Optional[Tensor] = None,
eta: float = 1e-3,
eta: Optional[Union[Tensor, float]] = 1e-3,
prune_baseline: bool = False,
alpha: float = 0.0,
cache_pending: bool = True,
Expand Down Expand Up @@ -400,7 +415,11 @@ def __init__(
have points that have been submitted for function evaluation, but
have not yet been evaluated.
eta: The temperature parameter for the sigmoid function used for the
differentiable approximation of the constraints.
differentiable approximation of the constraints. In case of a float the
same eta is used for every constraint in constraints. In case of a
tensor the length of the tensor must match the number of provided
constraints. The i-th constraint is then estimated with the i-th
eta value.
prune_baseline: If True, remove points in `X_baseline` that are
highly unlikely to be the pareto optimal and better than the
reference point. This can significantly improve computation time and
Expand Down Expand Up @@ -431,6 +450,7 @@ def __init__(
sampler=sampler,
objective=objective,
constraints=constraints,
eta=eta,
)
self._setup(model=model, cache_root=cache_root)

Expand All @@ -450,7 +470,6 @@ def __init__(
)
self.register_buffer("ref_point", ref_point)
self.alpha = alpha
self.eta = eta
self.q_in = -1
self.q_out = -1
self.q_subset_indices = BufferDict()
Expand Down
9 changes: 7 additions & 2 deletions botorch/acquisition/multi_objective/multi_fidelity.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def __init__(
sampler: Optional[MCSampler] = None,
objective: Optional[MCMultiOutputObjective] = None,
constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
eta: Optional[Union[Tensor, float]] = 1e-3,
X_pending: Optional[Tensor] = None,
cost_call: Callable[Tensor, Tensor] = None,
eta: float = 1e-3,
**kwargs: Any,
) -> None:
r"""MOMF acquisition function supporting m>=2 outcomes.
Expand Down Expand Up @@ -98,7 +98,11 @@ def __init__(
`batch_shape x q x m`. Defaults to an AffineCostModel with
`C(s) = 1 + s`.
eta: The temperature parameter for the sigmoid function used for the
differentiable approximation of the constraints.
differentiable approximation of the constraints. In case of a float the
same eta is used for every constraint in constraints. In case of a
tensor the length of the tensor must match the number of provided
constraints. The i-th constraint is then estimated with the i-th
eta value.
"""

if len(ref_point) != partitioning.num_outcomes:
Expand All @@ -119,6 +123,7 @@ def __init__(
sampler=sampler,
objective=objective,
constraints=constraints,
eta=eta,
X_pending=X_pending,
)

Expand Down
12 changes: 9 additions & 3 deletions botorch/acquisition/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,7 @@ def __init__(
objective: Callable[[Tensor, Optional[Tensor]], Tensor],
constraints: List[Callable[[Tensor], Tensor]],
infeasible_cost: Union[Tensor, float] = 0.0,
eta: float = 1e-3,
eta: Union[Tensor, float] = 1e-3,
) -> None:
r"""
Args:
Expand All @@ -468,11 +468,17 @@ def __init__(
infeasible_cost: The cost of a design if all associated samples are
infeasible.
eta: The temperature parameter of the sigmoid function approximating
the constraint.
the constraint. Can be either a float or a 1-dim tensor. In case
of a float the same eta is used for every constraint in
constraints. In case of a tensor the length of the tensor must
match the number of provided constraints. The i-th constraint is
then estimated with the i-th eta value.
"""
super().__init__(objective=objective)
self.constraints = constraints
self.register_buffer("eta", torch.as_tensor(eta))
if type(eta) != Tensor:
eta = torch.full((len(constraints),), eta)
self.register_buffer("eta", eta)
self.register_buffer("infeasible_cost", torch.as_tensor(infeasible_cost))

def forward(self, samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
Expand Down
30 changes: 22 additions & 8 deletions botorch/utils/objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from __future__ import annotations

from typing import Callable, List, Optional
from typing import Callable, List, Optional, Union

import torch
from torch import Tensor
Expand Down Expand Up @@ -64,7 +64,7 @@ def apply_constraints_nonnegative_soft(
obj: Tensor,
constraints: List[Callable[[Tensor], Tensor]],
samples: Tensor,
eta: float,
eta: Union[Tensor, float],
) -> Tensor:
r"""Applies constraints to a non-negative objective.

Expand All @@ -78,14 +78,24 @@ def apply_constraints_nonnegative_soft(
This callable must support broadcasting. Only relevant for multi-
output models (`m` > 1).
samples: A `n_samples x b x q x m` Tensor of samples drawn from the posterior.
eta: The temperature parameter for the sigmoid function.
eta: The temperature parameter for the sigmoid function. Can be either a float
or a 1-dim tensor. In case of a float the same eta is used for every
constraint in constraints. In case of a tensor the length of the tensor
must match the number of provided constraints. The i-th constraint is
then estimated with the i-th eta value.

Returns:
A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives.
"""
if type(eta) != Tensor:
eta = torch.full((len(constraints),), eta)
if len(eta) != len(constraints):
raise ValueError(
"Number of provided constraints and number of provided etas do not match."
)
obj = obj.clamp_min(0) # Enforce non-negativity with constraints
for constraint in constraints:
constraint_eval = soft_eval_constraint(constraint(samples), eta=eta)
for constraint, e in zip(constraints, eta):
constraint_eval = soft_eval_constraint(constraint(samples), eta=e)
if obj.dim() == samples.dim():
# Need to unsqueeze to accommodate the outcome dimension.
constraint_eval = constraint_eval.unsqueeze(-1)
Expand All @@ -101,7 +111,7 @@ def soft_eval_constraint(lhs: Tensor, eta: float = 1e-3) -> Tensor:
Args:
lhs: The left hand side of the constraint `lhs <= 0`.
eta: The temperature parameter of the softmax function. As eta
grows larger, this approximates the Heaviside step function.
decreases, this approximates the Heaviside step function.

Returns:
Element-wise 'soft' feasibility indicator of the same shape as `lhs`.
Expand All @@ -118,7 +128,7 @@ def apply_constraints(
constraints: List[Callable[[Tensor], Tensor]],
samples: Tensor,
infeasible_cost: float,
eta: float = 1e-3,
eta: Union[Tensor, float] = 1e-3,
) -> Tensor:
r"""Apply constraints using an infeasible_cost `M` for negative objectives.

Expand All @@ -136,7 +146,11 @@ def apply_constraints(
output models (`m` > 1).
samples: A `n_samples x b x q x m` Tensor of samples drawn from the posterior.
infeasible_cost: The infeasible value.
eta: The temperature parameter of the sigmoid function.
eta: The temperature parameter of the sigmoid function. Can be either a float
or a 1-dim tensor. In case of a float the same eta is used for every
constraint in constraints. In case of a tensor the length of the tensor
must match the number of provided constraints. The i-th constraint is
then estimated with the i-th eta value.

Returns:
A `n_samples x b x q (x m')`-dim tensor of feasibility-weighted objectives.
Expand Down
102 changes: 99 additions & 3 deletions test/acquisition/multi_objective/test_monte_carlo.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,16 +517,59 @@ def test_constrained_q_expected_hypervolume_improvement(self):
X = torch.zeros(1, 1, **tkwargs)
# test zero slack
for eta in (1e-1, 1e-2):
expected_values = [0.5 * 1.5, 0.5 * 0.5 * 1.5]
for i, constraints in enumerate(
[
[lambda Z: torch.zeros_like(Z[..., -1])],
[
lambda Z: torch.zeros_like(Z[..., -1]),
lambda Z: torch.zeros_like(Z[..., -1]),
],
]
):
acqf = qExpectedHypervolumeImprovement(
model=mm,
ref_point=ref_point,
partitioning=partitioning,
sampler=sampler,
constraints=constraints,
eta=eta,
)
res = acqf(X)
self.assertAlmostEqual(res.item(), expected_values[i], places=4)
# test multiple constraints one and multiple etas
constraints = [
lambda Z: torch.ones_like(Z[..., -1]),
lambda Z: torch.ones_like(Z[..., -1]),
]
etas = [1, torch.tensor([1, 10])]
expected_values = [
(
torch.sigmoid(torch.as_tensor(-1.0))
* torch.sigmoid(torch.as_tensor(-1.0))
* 1.5
).item(),
(
torch.sigmoid(torch.as_tensor(-1.0))
* torch.sigmoid(torch.as_tensor(-1.0 / 10.0))
* 1.5
).item(),
]
for eta, expected_value in zip(etas, expected_values):
acqf = qExpectedHypervolumeImprovement(
model=mm,
ref_point=ref_point,
partitioning=partitioning,
sampler=sampler,
constraints=[lambda Z: torch.zeros_like(Z[..., -1])],
constraints=constraints,
eta=eta,
)
res = acqf(X)
self.assertAlmostEqual(res.item(), 0.5 * 1.5, places=4)
self.assertAlmostEqual(
res.item(),
expected_value,
places=4,
)
# test feasible
acqf = qExpectedHypervolumeImprovement(
model=mm,
Expand Down Expand Up @@ -1074,7 +1117,29 @@ def test_constrained_q_noisy_expected_hypervolume_improvement(self):
)
mm = MockModel(MockPosterior(samples=baseline_samples))
X = torch.zeros(1, 1, **tkwargs)
# test zero slack
# test zero slack multiple constraints, multiple etas
for eta in [1e-1, 1e-2, torch.tensor([1.0, 10.0])]:
# set the MockPosterior to use samples over baseline points
mm._posterior._samples = baseline_samples
sampler = IIDNormalSampler(sample_shape=torch.Size([1]))
acqf = qNoisyExpectedHypervolumeImprovement(
model=mm,
ref_point=ref_point,
X_baseline=X_baseline,
sampler=sampler,
constraints=[
lambda Z: torch.zeros_like(Z[..., -1]),
lambda Z: torch.zeros_like(Z[..., -1]),
],
eta=eta,
cache_root=False,
)
# set the MockPosterior to use samples over baseline points and new
# candidates
mm._posterior._samples = samples
res = acqf(X)
self.assertAlmostEqual(res.item(), 0.5 * 0.5 * 1.5, places=4)
# test zero slack single constraint
for eta in (1e-1, 1e-2):
# set the MockPosterior to use samples over baseline points
mm._posterior._samples = baseline_samples
Expand Down Expand Up @@ -1169,6 +1234,37 @@ def test_constrained_q_noisy_expected_hypervolume_improvement(self):
mm._posterior._samples = samples
res = acqf(X)
self.assertAlmostEqual(res.item(), 1.5, places=4)
# test multiple constraints one eta with
# this crashes for large etas, and I do not why
# set the MockPosterior to use samples over baseline points
etas = [torch.tensor([1.0]), torch.tensor([1.0, 10.0])]
constraints = [
[lambda Z: torch.ones_like(Z[..., -1])],
[
lambda Z: torch.ones_like(Z[..., -1]),
lambda Z: torch.ones_like(Z[..., -1]),
],
]
Comment on lines +1241 to +1247
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could make this a bit cleaner, but not really necessary

Suggested change
constraints = [
[lambda Z: torch.ones_like(Z[..., -1])],
[
lambda Z: torch.ones_like(Z[..., -1]),
lambda Z: torch.ones_like(Z[..., -1]),
],
]
def con_func(Z):
torch.ones_like(Z[..., -1])
constraints = [[con_func], [con_func, con_func]]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Balandat , if you want I can change it accordingly, is this still possible as the PR is already imported to Phabricator?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah we can always reimport (not anymore for this one b/c it already got merged). So no worries for this one as this was really a minor nit.

expected_values = [
(torch.sigmoid(torch.as_tensor(-1.0 / 1)) * 1.5).item(),
(
torch.sigmoid(torch.as_tensor(-1.0 / 1))
* torch.sigmoid(torch.as_tensor(-1.0 / 10))
* 1.5
).item(),
]
for eta, constraint, expected_value in zip(
etas, constraints, expected_values
):
acqf.constraints = constraint
acqf.eta = eta
res = acqf(X)

self.assertAlmostEqual(
res.item(),
expected_value,
places=4,
)
# test infeasible
# set the MockPosterior to use samples over baseline points
mm._posterior._samples = baseline_samples
Expand Down
Loading