Skip to content

Commit

Permalink
Categorical deterministic surrogate (#479)
Browse files Browse the repository at this point in the history
* implement categorical deterministic surrogate

* add tests

* modify notebook

* make pyright happy
  • Loading branch information
jduerholt authored Dec 9, 2024
1 parent ee13ae3 commit b840bae
Show file tree
Hide file tree
Showing 8 changed files with 489 additions and 48 deletions.
6 changes: 5 additions & 1 deletion bofire/data_models/surrogates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
AnyBotorchSurrogate,
BotorchSurrogates,
)
from bofire.data_models.surrogates.deterministic import LinearDeterministicSurrogate
from bofire.data_models.surrogates.deterministic import (
CategoricalDeterministicSurrogate,
LinearDeterministicSurrogate,
)
from bofire.data_models.surrogates.empirical import EmpiricalSurrogate
from bofire.data_models.surrogates.fully_bayesian import SaasSingleTaskGPSurrogate
from bofire.data_models.surrogates.linear import LinearSurrogate
Expand Down Expand Up @@ -55,6 +58,7 @@
PolynomialSurrogate,
TanimotoGPSurrogate,
LinearDeterministicSurrogate,
CategoricalDeterministicSurrogate,
MultiTaskGPSurrogate,
SingleTaskIBNNSurrogate,
PiecewiseLinearGPSurrogate,
Expand Down
6 changes: 5 additions & 1 deletion bofire/data_models/surrogates/botorch_surrogates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

from bofire.data_models.base import BaseModel
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.surrogates.deterministic import LinearDeterministicSurrogate
from bofire.data_models.surrogates.deterministic import (
CategoricalDeterministicSurrogate,
LinearDeterministicSurrogate,
)
from bofire.data_models.surrogates.empirical import EmpiricalSurrogate
from bofire.data_models.surrogates.fully_bayesian import SaasSingleTaskGPSurrogate
from bofire.data_models.surrogates.linear import LinearSurrogate
Expand Down Expand Up @@ -39,6 +42,7 @@
LinearSurrogate,
PolynomialSurrogate,
LinearDeterministicSurrogate,
CategoricalDeterministicSurrogate,
MultiTaskGPSurrogate,
PiecewiseLinearGPSurrogate,
]
Expand Down
53 changes: 53 additions & 0 deletions bofire/data_models/surrogates/deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,66 @@

from bofire.data_models.features.api import (
AnyOutput,
CategoricalInput,
ContinuousInput,
ContinuousOutput,
DiscreteInput,
)
from bofire.data_models.surrogates.botorch import BotorchSurrogate


class CategoricalDeterministicSurrogate(BotorchSurrogate):
"""Surrogate model that can be used to map categories of a categorical
input feature to a deterministic continuous value.
This is useful if one wants to penalize certain categories of an input feature
more than others during the optimization process.
Attributes:
mapping: A dictionary mapping categories to deterministic float values.
"""

type: Literal["CategoricalDeterministicSurrogate"] = (
"CategoricalDeterministicSurrogate"
)
mapping: Annotated[Dict[str, float], Field(min_length=2)]

@model_validator(mode="after")
def validate_input_types(self):
if len(self.inputs.get([CategoricalInput])) != len(self.inputs):
raise ValueError(
"Only categorical are supported for the `CategoricalDeterministicSurrogate`",
)
return self

@classmethod
def is_output_implemented(cls, my_type: Type[AnyOutput]) -> bool:
"""Checks output type for surrogate models
Args:
my_type: continuous or categorical output
Returns:
bool: True if the output type is valid for the surrogate chosen, False otherwise
"""
return isinstance(my_type, type(ContinuousOutput))

@model_validator(mode="after")
def validate_mapping(self):
"""Validate the mapping keys match the categories of the input feature.
Raises:
ValueError: If more than one input feature is present.
ValueError: If the mapping keys do not match the categories of the input feature.
"""
if len(self.inputs) != 1:
raise ValueError(
"Only one input is supported for the `CategoricalDeterministicSurrogate`"
)
if sorted(self.inputs[0].categories) != sorted(self.mapping.keys()):
raise ValueError("Mapping keys do not match input feature keys.")
return self


class LinearDeterministicSurrogate(BotorchSurrogate):
type: Literal["LinearDeterministicSurrogate"] = "LinearDeterministicSurrogate"
coefficients: Annotated[Dict[str, float], Field(min_length=1)]
Expand Down
27 changes: 25 additions & 2 deletions bofire/surrogates/deterministic.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
import torch
from botorch.models.deterministic import AffineDeterministicModel

from bofire.data_models.surrogates.api import LinearDeterministicSurrogate as DataModel
from bofire.data_models.surrogates.api import (
CategoricalDeterministicSurrogate as CategoricalDeterministicSurrogateDataModel,
)
from bofire.data_models.surrogates.api import (
LinearDeterministicSurrogate as LinearDeterministicSurrogateDataModel,
)
from bofire.surrogates.botorch import BotorchSurrogate
from bofire.utils.torch_tools import tkwargs


class LinearDeterministicSurrogate(BotorchSurrogate):
def __init__(
self,
data_model: DataModel,
data_model: LinearDeterministicSurrogateDataModel,
**kwargs,
):
self.intercept = data_model.intercept
Expand All @@ -23,3 +28,21 @@ def __init__(
.to(**tkwargs)
.unsqueeze(-1),
)


class CategoricalDeterministicSurrogate(BotorchSurrogate):
def __init__(
self,
data_model: CategoricalDeterministicSurrogateDataModel,
**kwargs,
):
self.mapping = data_model.mapping
super().__init__(data_model=data_model, **kwargs)
self.model = AffineDeterministicModel(
b=0.0,
a=torch.tensor(
[data_model.mapping[key] for key in self.inputs[0].categories],
)
.to(**tkwargs)
.unsqueeze(-1),
)
6 changes: 5 additions & 1 deletion bofire/surrogates/mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Dict, Type

from bofire.data_models.surrogates import api as data_models
from bofire.surrogates.deterministic import LinearDeterministicSurrogate
from bofire.surrogates.deterministic import (
CategoricalDeterministicSurrogate,
LinearDeterministicSurrogate,
)
from bofire.surrogates.empirical import EmpiricalSurrogate
from bofire.surrogates.fully_bayesian import SaasSingleTaskGPSurrogate
from bofire.surrogates.mixed_single_task_gp import MixedSingleTaskGPSurrogate
Expand Down Expand Up @@ -32,6 +35,7 @@
data_models.MultiTaskGPSurrogate: MultiTaskGPSurrogate,
data_models.SingleTaskIBNNSurrogate: SingleTaskGPSurrogate,
data_models.PiecewiseLinearGPSurrogate: PiecewiseLinearGPSurrogate,
data_models.CategoricalDeterministicSurrogate: CategoricalDeterministicSurrogate,
}


Expand Down
63 changes: 63 additions & 0 deletions tests/bofire/data_models/specs/surrogates.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,69 @@
},
)

specs.add_valid(
models.CategoricalDeterministicSurrogate,
lambda: {
"inputs": Inputs(
features=[
CategoricalInput(key="x_cat", categories=["a", "b", "c"]),
],
).model_dump(),
"outputs": Outputs(
features=[
ContinuousOutput(key="y_cat"),
],
).model_dump(),
"input_preprocessing_specs": {"x_cat": CategoricalEncodingEnum.ONE_HOT},
"mapping": {"a": 0.1, "b": 0.2, "c": 1.0},
"dump": None,
},
)


specs.add_invalid(
models.CategoricalDeterministicSurrogate,
lambda: {
"inputs": Inputs(
features=[
CategoricalInput(key="x_cat", categories=["a", "b", "c"]),
CategoricalInput(key="x_cat2", categories=["a", "b", "c"]),
],
).model_dump(),
"outputs": Outputs(
features=[
ContinuousOutput(key="y_cat"),
],
).model_dump(),
"input_preprocessing_specs": {"x_cat": CategoricalEncodingEnum.ONE_HOT},
"mapping": {"a": 0.1, "b": 0.2, "c": 1.0},
"dump": None,
},
error=ValueError,
message="Only one input is supported for the `CategoricalDeterministicSurrogate`",
)

specs.add_invalid(
models.CategoricalDeterministicSurrogate,
lambda: {
"inputs": Inputs(
features=[
CategoricalInput(key="x_cat", categories=["a", "b", "c"]),
],
).model_dump(),
"outputs": Outputs(
features=[
ContinuousOutput(key="y_cat"),
],
).model_dump(),
"input_preprocessing_specs": {"x_cat": CategoricalEncodingEnum.ONE_HOT},
"mapping": {"a": 0.1, "b": 0.2, "d": 1.0},
"dump": None,
},
error=ValueError,
message="Mapping keys do not match input feature keys.",
)

specs.add_valid(
models.LinearDeterministicSurrogate,
lambda: {
Expand Down
27 changes: 25 additions & 2 deletions tests/bofire/surrogates/test_deterministic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,15 @@

import bofire.surrogates.api as surrogates
from bofire.data_models.domain.api import Inputs, Outputs
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput
from bofire.data_models.surrogates.api import LinearDeterministicSurrogate
from bofire.data_models.features.api import (
CategoricalInput,
ContinuousInput,
ContinuousOutput,
)
from bofire.data_models.surrogates.api import (
CategoricalDeterministicSurrogate,
LinearDeterministicSurrogate,
)


def test_linear_deterministic_surrogate():
Expand All @@ -24,3 +31,19 @@ def test_linear_deterministic_surrogate():
experiments = pd.DataFrame(data={"a": [1.0, 2.0], "b": [0.5, 4.0]})
preds = surrogate.predict(experiments)
assert_frame_equal(preds, pd.DataFrame(data={"y_pred": [1.5, 10.0], "y_sd": 0.0}))


def test_categorical_deterministic_surrogate():
surrogate_data = CategoricalDeterministicSurrogate(
inputs=Inputs(
features=[
CategoricalInput(key="a", categories=["A", "B"]),
],
),
outputs=Outputs(features=[ContinuousOutput(key="y")]),
mapping={"A": 1.0, "B": 2.0},
)
surrogate = surrogates.map(surrogate_data)
experiments = pd.DataFrame(data={"a": ["A", "B"]})
preds = surrogate.predict(experiments)
assert_frame_equal(preds, pd.DataFrame(data={"y_pred": [1.0, 2.0], "y_sd": 0.0}))
Loading

0 comments on commit b840bae

Please sign in to comment.