Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the fractional factorial strategy #403

Merged
merged 6 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 4 additions & 85 deletions bofire/data_models/strategies/fractional_factorial.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import re
import string
from itertools import combinations
from typing import Annotated, Literal, Optional, Type
from typing import Annotated, Literal, Type

from pydantic import Field, model_validator

from bofire.data_models.constraints.api import Constraint
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput, Feature
from bofire.data_models.strategies.strategy import Strategy
from bofire.utils.doe import get_generator, validate_generator


class FractionalFactorialStrategy(Strategy):
Expand All @@ -30,88 +28,9 @@ def is_feature_implemented(cls, my_type: Type[Feature]) -> bool:
@model_validator(mode="after")
def validate(self):
if len(self.generator) > 0:
self.validate_generator(len(self.domain.inputs), self.generator)
validate_generator(len(self.domain.inputs), self.generator)
else:
self.get_generator(
get_generator(
n_factors=len(self.domain.inputs), n_generators=self.n_generators
)
return self

@staticmethod
def validate_generator(n_factors: int, generator: str) -> str:
if len(generator.split(" ")) != n_factors:
raise ValueError("Generator does not match the number of factors.")
# clean it and transform it into a list
generators = [item for item in re.split(r"\-|\s|\+", generator) if item]
lengthes = [len(i) for i in generators]

# Indices of single letters (main factors)
idx_main = [i for i, item in enumerate(lengthes) if item == 1]

# Check that single letters (main factors) are unique
if len(idx_main) != len({generators[i] for i in idx_main}):
raise ValueError("Main factors are confounded with each other.")

# Check that single letters (main factors) follow the alphabet
if (
"".join(sorted([generators[i] for i in idx_main]))
!= string.ascii_lowercase[: len(idx_main)]
):
raise ValueError(
f'Use the letters `{" ".join(string.ascii_lowercase[: len(idx_main)])}` for the main factors.'
)

# Indices of letter combinations.
idx_combi = [i for i, item in enumerate(generators) if item != 1]

# Check that letter combinations are unique
if len(idx_combi) != len({generators[i] for i in idx_combi}):
raise ValueError("Generators are not unique.")

# Check that only letters are used in the combinations that are also single letters (main factors)
if not all(
set(item).issubset({generators[i] for i in idx_main})
for item in [generators[i] for i in idx_combi]
):
raise ValueError("Generators are not valid.")

return generator

@staticmethod
def get_generator(
n_factors: int, n_generators: int, seed: Optional[int] = None
) -> str:
if n_generators == 0:
return " ".join(list(string.ascii_lowercase[:n_factors]))
n_base_factors = n_factors - n_generators
if n_generators == 1:
if n_base_factors == 1:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
return " ".join(
list(string.ascii_lowercase[:n_base_factors])
+ [string.ascii_lowercase[:n_base_factors]]
)
n_base_factors = n_factors - n_generators
if n_base_factors - 1 < 2:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
generators = [
"".join(i)
for i in (
combinations(
string.ascii_lowercase[:n_base_factors], n_base_factors - 1
)
)
]
if len(generators) > n_generators:
generators = generators[:n_generators]
elif (n_generators - len(generators) == 1) and (n_base_factors > 1):
generators += [string.ascii_lowercase[:n_base_factors]]
elif n_generators - len(generators) >= 1:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
return " ".join(list(string.ascii_lowercase[:n_base_factors]) + generators)
4 changes: 2 additions & 2 deletions bofire/strategies/fractional_factorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import numpy as np
import pandas as pd
from pyDOE3 import fracfact

from bofire.data_models.strategies.api import FractionalFactorialStrategy as DataModel
from bofire.strategies.strategy import Strategy
from bofire.utils.doe import fracfact, get_generator


class FractionalFactorialStrategy(Strategy):
Expand All @@ -27,7 +27,7 @@ def _ask(self, candidate_count: Optional[int] = None) -> pd.DataFrame:
"The strategy automatically determines how many candidates to "
"propose."
)
gen = self.generator or DataModel.get_generator(
gen = self.generator or get_generator(
n_factors=len(self.domain.inputs), n_generators=self.n_generators
)
design = pd.DataFrame(fracfact(gen=gen), columns=self.domain.inputs.get_keys())
Expand Down
212 changes: 211 additions & 1 deletion bofire/utils/doe.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import itertools
import re
import string
import warnings
from typing import List, Optional

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

from bofire.data_models.domain.api import Inputs
from bofire.data_models.features.api import CategoricalInput, ContinuousInput
Expand All @@ -30,6 +32,8 @@ def get_confounding_matrix(
Returns:
_type_: _description_
"""
from sklearn.preprocessing import MinMaxScaler

if len(inputs.get(CategoricalInput)) > 0:
warnings.warn("Categorical input features will be ignored.")

Expand Down Expand Up @@ -58,3 +62,209 @@ def get_confounding_matrix(
scaled_design[":".join(combi)] = scaled_design[list(combi)].prod(axis=1)

return scaled_design.corr()


def ff2n(n_factors: int) -> np.ndarray:
"""Computes the full factorial design for a given number of factors.

Args:
n_factors: The number of factors.

Returns:
The full factorial design.
"""
return np.array(list(itertools.product([-1, 1], repeat=n_factors)))


def validate_generator(n_factors: int, generator: str) -> str:
"""Validates the generator and thows an error if it is not valid."""

if len(generator.split(" ")) != n_factors:
raise ValueError("Generator does not match the number of factors.")
# clean it and transform it into a list
generators = [item for item in re.split(r"\-|\s|\+", generator) if item]
lengthes = [len(i) for i in generators]

# Indices of single letters (main factors)
idx_main = [i for i, item in enumerate(lengthes) if item == 1]

if len(idx_main) == 0:
raise ValueError("At least one unconfounded main factor is needed.")

# Check that single letters (main factors) are unique
if len(idx_main) != len({generators[i] for i in idx_main}):
raise ValueError("Main factors are confounded with each other.")

# Check that single letters (main factors) follow the alphabet
if (
"".join(sorted([generators[i] for i in idx_main]))
!= string.ascii_lowercase[: len(idx_main)]
):
raise ValueError(
f'Use the letters `{" ".join(string.ascii_lowercase[: len(idx_main)])}` for the main factors.'
)

# Indices of letter combinations.
idx_combi = [i for i, item in enumerate(generators) if item != 1]

# check that main factors come before combinations
if min(idx_combi) > max(idx_main):
raise ValueError("Main factors have to come before combinations.")

# Check that letter combinations are unique
if len(idx_combi) != len({generators[i] for i in idx_combi}):
raise ValueError("Generators are not unique.")

# Check that only letters are used in the combinations that are also single letters (main factors)
if not all(
set(item).issubset({generators[i] for i in idx_main})
for item in [generators[i] for i in idx_combi]
):
raise ValueError("Generators are not valid.")

return generator


def fracfact(gen) -> np.ndarray:
"""Computes the fractional factorial design for a given generator.

Args:
gen: The generator.

Returns:
The fractional factorial design.
"""
gen = validate_generator(n_factors=gen.count(" ") + 1, generator=gen)

generators = [item for item in re.split(r"\-|\s|\+", gen) if item]
lengthes = [len(i) for i in generators]

# Indices of single letters (main factors)
idx_main = [i for i, item in enumerate(lengthes) if item == 1]

# Indices of letter combinations.
idx_combi = [i for i, item in enumerate(generators) if item != 1]

# Check if there are "-" operators in gen
idx_negative = [
i for i, item in enumerate(gen.split(" ")) if item[0] == "-"
] # remove empty strings

# Fill in design with two level factorial design
H1 = ff2n(len(idx_main))
H = np.zeros((H1.shape[0], len(lengthes)))
H[:, idx_main] = H1

# Recognize combinations and fill in the rest of matrix H2 with the proper
# products
for k in idx_combi:
# For lowercase letters
xx = np.array([ord(c) for c in generators[k]]) - 97

H[:, k] = np.prod(H1[:, xx], axis=1)

# Update design if gen includes "-" operator
if len(idx_negative) > 0:
H[:, idx_negative] *= -1

# Return the fractional factorial design
return H


def get_alias_structure(gen: str, order: int = 4) -> List[str]:
"""Computes the alias structure of the design matrix. Works only for generators
with positive signs.

Args:
gen: The generator.
order: The order up to wich the alias structure should be calculated. Defaults to 4.

Returns:
The alias structure of the design matrix.
"""
design = fracfact(gen)

n_experiments, n_factors = design.shape

all_names = string.ascii_lowercase + "I"
factors = range(n_factors)
all_combinations = itertools.chain.from_iterable(
(
itertools.combinations(factors, n)
for n in range(1, min(n_factors, order) + 1)
)
)
aliases = {n_experiments * "+": [(26,)]} # 26 is mapped to I

for combination in all_combinations:
# positive sign
contrast = np.prod(
design[:, combination], axis=1
) # this is the product of the combination
scontrast = "".join(np.where(contrast == 1, "+", "-").tolist())
aliases[scontrast] = aliases.get(scontrast, [])
aliases[scontrast].append(combination) # type: ignore

aliases_list = []
for alias in aliases.values():
aliases_list.append(
sorted(alias, key=lambda a: (len(a), a))
) # sort by length and then by the combination
aliases_list = sorted(
aliases_list, key=lambda list: ([len(a) for a in list], list)
) # sort by the length of the alias

aliases_readable = []

for alias in aliases_list:
aliases_readable.append(
" = ".join(["".join([all_names[f] for f in a]) for a in alias])
)

return aliases_readable


def get_generator(n_factors: int, n_generators: int) -> str:
"""Computes a generator for a given number of factors and generators.

Args:
n_factors: The number of factors.
n_generators: The number of generators.

Returns:
The generator.
"""
if n_generators == 0:
return " ".join(list(string.ascii_lowercase[:n_factors]))
n_base_factors = n_factors - n_generators
if n_generators == 1:
if n_base_factors == 1:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
return " ".join(
list(string.ascii_lowercase[:n_base_factors])
+ [string.ascii_lowercase[:n_base_factors]]
)
n_base_factors = n_factors - n_generators
if n_base_factors - 1 < 2:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
generators = [
"".join(i)
for i in (
itertools.combinations(
string.ascii_lowercase[:n_base_factors], n_base_factors - 1
)
)
]
if len(generators) > n_generators:
generators = generators[:n_generators]
elif (n_generators - len(generators) == 1) and (n_base_factors > 1):
generators += [string.ascii_lowercase[:n_base_factors]]
elif n_generators - len(generators) >= 1:
raise ValueError(
"Design not possible, as main factors are confounded with each other."
)
return " ".join(list(string.ascii_lowercase[:n_base_factors]) + generators)
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
"cloudpickle>=2.0.0",
"sympy>=1.12",
"cvxpy[CLARABEL]",
"pyDOE3",
sklearn_dependency,
],
"entmoot": ["entmoot>=2.0", "lightgbm==4.0.0", "pyomo==6.7.1", "gurobipy"],
Expand Down
Loading
Loading