From 200349fa2aa3ab9f89ac516ec225e7351f10f2a3 Mon Sep 17 00:00:00 2001 From: Elizabeth Santorella Date: Mon, 26 Aug 2024 18:05:25 -0700 Subject: [PATCH] Compute ground-truth optimization trace on BenchmarkProblem (#2704) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/2704 This currently doesn't change behavior, but enables more flexibility by redefining `Problem.get_ground_truth_opt_trace` or `BenchmarkRunner.get_Y_true`. This is currently duplicative with the tracking metrics setup, which will be reaped in D61432000; these should be combined. This diff has both ways of computing the optimization trace running, along with an assertion that they give the same results. Reviewed By: saitcakmak Differential Revision: D61404056 --- ax/benchmark/benchmark.py | 3 + ax/benchmark/benchmark_problem.py | 111 +++++++++++++----- ax/benchmark/runners/botorch_test.py | 35 +++--- .../problems/synthetic/hss/test_jenatton.py | 5 +- .../tests/problems/test_surrogate_problems.py | 2 +- ax/utils/testing/benchmark_stubs.py | 43 +++++-- 6 files changed, 139 insertions(+), 60 deletions(-) diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py index bbbe9b1149d..14dca5c998e 100644 --- a/ax/benchmark/benchmark.py +++ b/ax/benchmark/benchmark.py @@ -151,6 +151,9 @@ def benchmark_replication( scheduler.get_trace(optimization_config=analysis_opt_config) ) + new_optimization_trace = problem.get_opt_trace(experiment=experiment) + np.testing.assert_allclose(optimization_trace, new_optimization_trace) + try: # Catch any errors that may occur during score computation, such as errors # while accessing "steps" in node based generation strategies. The error diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py index eac5009572f..77f5dabee69 100644 --- a/ax/benchmark/benchmark_problem.py +++ b/ax/benchmark/benchmark_problem.py @@ -8,10 +8,16 @@ from dataclasses import dataclass, field from typing import Any, Optional, Union +import numpy as np +import pandas as pd + from ax.benchmark.metrics.base import BenchmarkMetricBase from ax.benchmark.metrics.benchmark import BenchmarkMetric +from ax.benchmark.runners.base import BenchmarkRunner from ax.benchmark.runners.botorch_test import BotorchTestProblemRunner +from ax.core.data import Data +from ax.core.experiment import Experiment from ax.core.objective import MultiObjective, Objective from ax.core.optimization_config import ( MultiObjectiveOptimizationConfig, @@ -20,9 +26,9 @@ ) from ax.core.outcome_constraint import OutcomeConstraint from ax.core.parameter import ParameterType, RangeParameter -from ax.core.runner import Runner from ax.core.search_space import SearchSpace from ax.core.types import ComparisonOp +from ax.service.utils.best_point_mixin import BestPointMixin from ax.utils.common.base import Base from ax.utils.common.typeutils import checked_cast from botorch.test_functions.base import ( @@ -31,6 +37,7 @@ MultiObjectiveTestProblem, ) from botorch.test_functions.synthetic import SyntheticTestFunction +from pyre_extensions import assert_is_instance def _get_name( @@ -89,9 +96,52 @@ class BenchmarkProblem(Base): optimal_value: float search_space: SearchSpace = field(repr=False) - runner: Runner = field(repr=False) + runner: BenchmarkRunner = field(repr=False) is_noiseless: bool + def get_oracle_experiment(self, experiment: Experiment) -> Experiment: + records = [] + + new_experiment = Experiment( + search_space=self.search_space, optimization_config=self.optimization_config + ) + for trial_index, trial in experiment.trials.items(): + for arm in trial.arms: + for metric_name, metric_value in zip( + self.runner.outcome_names, self.runner.evaluate_oracle(arm=arm) + ): + records.append( + { + "arm_name": arm.name, + "metric_name": metric_name, + "mean": metric_value.item(), + "sem": 0.0, + "trial_index": trial_index, + } + ) + + new_experiment.attach_trial( + parameterizations=[arm.parameters for arm in trial.arms], + arm_names=[arm.name for arm in trial.arms], + ) + for trial in new_experiment.trials.values(): + trial.mark_completed() + + data = Data(df=pd.DataFrame.from_records(records)) + new_experiment.attach_data(data=data, overwrite_existing_data=True) + return new_experiment + + def get_opt_trace(self, experiment: Experiment) -> np.ndarray: + """Evaluate the optimization trace of a list of Trials.""" + oracle_experiment = self.get_oracle_experiment(experiment=experiment) + + return np.array( + BestPointMixin._get_trace( + experiment=oracle_experiment, + optimization_config=self.optimization_config, + ) + ) + # TODO: Support constrained MOO problems. def get_soo_config_and_outcome_names( @@ -141,6 +191,20 @@ def get_soo_config_and_outcome_names( return opt_config, outcome_names +def get_continuous_search_space(bounds: list[tuple[float, float]]) -> SearchSpace: + return SearchSpace( + parameters=[ + RangeParameter( + name=f"x{i}", + parameter_type=ParameterType.FLOAT, + lower=lower, + upper=upper, + ) + for i, (lower, upper) in enumerate(bounds) + ] + ) + + def create_single_objective_problem_from_botorch( test_problem_class: type[SyntheticTestFunction], test_problem_kwargs: dict[str, Any], @@ -169,17 +233,7 @@ def create_single_objective_problem_from_botorch( test_problem = test_problem_class(**test_problem_kwargs) is_constrained = isinstance(test_problem, ConstrainedBaseTestProblem) - search_space = SearchSpace( - parameters=[ - RangeParameter( - name=f"x{i}", - parameter_type=ParameterType.FLOAT, - lower=lower, - upper=upper, - ) - for i, (lower, upper) in enumerate(test_problem._bounds) - ] - ) + search_space = get_continuous_search_space(test_problem._bounds) dim = test_problem_kwargs.get("dim", None) name = _get_name( @@ -249,18 +303,11 @@ def create_multi_objective_problem_from_botorch( # pyre-fixme [45]: Invalid class instantiation test_problem = test_problem_class(**test_problem_kwargs) - problem = create_single_objective_problem_from_botorch( - # pyre-fixme [6]: Passing a multi-objective problem where a - # single-objective problem is expected. - test_problem_class=test_problem_class, - test_problem_kwargs=test_problem_kwargs, - lower_is_better=True, # Seems like we always assume minimization for MOO? - num_trials=num_trials, - observe_noise_sd=observe_noise_sd, + dim = test_problem_kwargs.get("dim", None) + name = _get_name( + test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim ) - name = problem.name - n_obj = test_problem.num_objectives if not observe_noise_sd: noise_sds = [None] * n_obj @@ -292,15 +339,25 @@ def create_multi_objective_problem_from_botorch( for i, metric in enumerate(metrics) ], ) + runner = BotorchTestProblemRunner( + test_problem_class=test_problem_class, + test_problem_kwargs=test_problem_kwargs, + outcome_names=[ + objective.metric.name + for objective in assert_is_instance( + optimization_config.objective, MultiObjective + ).objectives + ], + ) return MultiObjectiveBenchmarkProblem( name=name, - search_space=problem.search_space, + search_space=get_continuous_search_space(test_problem._bounds), optimization_config=optimization_config, - runner=problem.runner, + runner=runner, num_trials=num_trials, - is_noiseless=problem.is_noiseless, + is_noiseless=test_problem.noise_std in (None, 0.0), observe_noise_stds=observe_noise_sd, - has_ground_truth=problem.has_ground_truth, + has_ground_truth=True, optimal_value=test_problem.max_hv, ) diff --git a/ax/benchmark/runners/botorch_test.py b/ax/benchmark/runners/botorch_test.py index 90fddbdefda..3a2fadb4d32 100644 --- a/ax/benchmark/runners/botorch_test.py +++ b/ax/benchmark/runners/botorch_test.py @@ -17,10 +17,7 @@ from ax.utils.common.base import Base from ax.utils.common.equality import equality_typechecker from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry -from botorch.test_functions.synthetic import ( - ConstrainedSyntheticTestFunction, - SyntheticTestFunction, -) +from botorch.test_functions.synthetic import BaseTestProblem, ConstrainedBaseTestProblem from botorch.utils.transforms import normalize, unnormalize from pyre_extensions import assert_is_instance from torch import Tensor @@ -60,22 +57,22 @@ def __eq__(self, other: Any) -> bool: class SyntheticProblemRunner(BenchmarkRunner, ABC): """A Runner for evaluating synthetic problems, either BoTorch - `SyntheticTestFunction`s or Ax benchmarking `ParamBasedTestProblem`s. + `BaseTestProblem`s or Ax benchmarking `ParamBasedTestProblem`s. Given a trial, the Runner will evaluate the problem noiselessly for each arm in the trial, as well as return some metadata about the underlying problem such as the noise_std. """ - test_problem: Union[SyntheticTestFunction, ParamBasedTestProblem] + test_problem: Union[BaseTestProblem, ParamBasedTestProblem] _is_constrained: bool - _test_problem_class: type[Union[SyntheticTestFunction, ParamBasedTestProblem]] + _test_problem_class: type[Union[BaseTestProblem, ParamBasedTestProblem]] _test_problem_kwargs: Optional[dict[str, Any]] def __init__( self, *, - test_problem_class: type[Union[SyntheticTestFunction, ParamBasedTestProblem]], + test_problem_class: type[Union[BaseTestProblem, ParamBasedTestProblem]], test_problem_kwargs: dict[str, Any], outcome_names: list[str], modified_bounds: Optional[list[tuple[float, float]]] = None, @@ -83,7 +80,7 @@ def __init__( """Initialize the test problem runner. Args: - test_problem_class: A BoTorch `SyntheticTestFunction` class or Ax + test_problem_class: A BoTorch `BaseTestProblem` class or Ax `ParamBasedTestProblem` class. test_problem_kwargs: The keyword arguments used for initializing the test problem. @@ -105,12 +102,12 @@ def __init__( # abstract class with abstract method `evaluate_true`. test_problem_class(**test_problem_kwargs) ) - if isinstance(self.test_problem, SyntheticTestFunction): + if isinstance(self.test_problem, BaseTestProblem): self.test_problem = self.test_problem.to(dtype=torch.double) - # A `ConstrainedSyntheticTestFunction` is a type of `SyntheticTestFunction`; a + # A `ConstrainedBaseTestProblem` is a type of `BaseTestProblem`; a # `ParamBasedTestProblem` is never constrained. self._is_constrained: bool = isinstance( - self.test_problem, ConstrainedSyntheticTestFunction + self.test_problem, ConstrainedBaseTestProblem ) self._is_moo: bool = self.test_problem.num_objectives > 1 self.outcome_names = outcome_names @@ -202,10 +199,10 @@ def deserialize_init_args( class BotorchTestProblemRunner(SyntheticProblemRunner): """ - A `SyntheticProblemRunner` for BoTorch `SyntheticTestFunction`s. + A `SyntheticProblemRunner` for BoTorch `BaseTestProblem`s. Args: - test_problem_class: A BoTorch `SyntheticTestFunction` class. + test_problem_class: A BoTorch `BaseTestProblem` class. test_problem_kwargs: The keyword arguments used for initializing the test problem. outcome_names: The names of the outcomes returned by the problem. @@ -223,7 +220,7 @@ class BotorchTestProblemRunner(SyntheticProblemRunner): def __init__( self, *, - test_problem_class: type[SyntheticTestFunction], + test_problem_class: type[BaseTestProblem], test_problem_kwargs: dict[str, Any], outcome_names: list[str], modified_bounds: Optional[list[tuple[float, float]]] = None, @@ -234,11 +231,9 @@ def __init__( outcome_names=outcome_names, modified_bounds=modified_bounds, ) - self.test_problem: SyntheticTestFunction = self.test_problem.to( - dtype=torch.double - ) + self.test_problem: BaseTestProblem = self.test_problem.to(dtype=torch.double) self._is_constrained: bool = isinstance( - self.test_problem, ConstrainedSyntheticTestFunction + self.test_problem, ConstrainedBaseTestProblem ) def get_Y_true(self, arm: Arm) -> Tensor: @@ -274,7 +269,7 @@ def get_Y_true(self, arm: Arm) -> Tensor: X = unnormalize(unit_X, self.test_problem.bounds) Y_true = self.test_problem.evaluate_true(X).view(-1) - # `SyntheticTestFunction.evaluate_true()` does not negate the outcome + # `BaseTestProblem.evaluate_true()` does not negate the outcome if self.test_problem.negate: Y_true = -Y_true diff --git a/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py b/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py index 11a634ce26e..5f8ee63db30 100644 --- a/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py +++ b/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py @@ -14,7 +14,6 @@ get_jenatton_benchmark_problem, jenatton_test_function, ) -from ax.benchmark.runners.base import BenchmarkRunner from ax.benchmark.runners.botorch_test import ParamBasedTestProblemRunner from ax.core.arm import Arm from ax.core.data import Data @@ -96,9 +95,7 @@ def test_jenatton_test_function(self) -> None: value, ) self.assertAlmostEqual( - assert_is_instance(benchmark_problem.runner, BenchmarkRunner) - .get_Y_true(arm) - .item(), + benchmark_problem.runner.evaluate_oracle(arm).item(), value, places=6, ) diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py index c9c2a334096..7295d617f5f 100644 --- a/ax/benchmark/tests/problems/test_surrogate_problems.py +++ b/ax/benchmark/tests/problems/test_surrogate_problems.py @@ -34,7 +34,7 @@ def test_repr(self) -> None: "SOOSurrogateBenchmarkProblem(name='test', " "optimization_config=OptimizationConfig(objective=Objective(metric_name=" '"branin", ' - "minimize=False), " + "minimize=True), " "outcome_constraints=[]), num_trials=6, " "observe_noise_stds=True, has_ground_truth=True, " "tracking_metrics=[], optimal_value=0.0, is_noiseless=True)" diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py index e1b390c8a15..fa681ea61a9 100644 --- a/ax/utils/testing/benchmark_stubs.py +++ b/ax/utils/testing/benchmark_stubs.py @@ -18,6 +18,7 @@ MultiObjectiveBenchmarkProblem, ) from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult +from ax.benchmark.metrics.benchmark import BenchmarkMetric from ax.benchmark.problems.surrogate import ( MOOSurrogateBenchmarkProblem, SOOSurrogateBenchmarkProblem, @@ -25,6 +26,7 @@ from ax.benchmark.runners.botorch_test import ParamBasedTestProblem from ax.benchmark.runners.surrogate import SurrogateRunner from ax.core.experiment import Experiment +from ax.core.objective import MultiObjective, Objective from ax.core.optimization_config import ( MultiObjectiveOptimizationConfig, OptimizationConfig, @@ -36,7 +38,6 @@ from ax.models.torch.botorch_modular.surrogate import Surrogate from ax.service.scheduler import SchedulerOptions from ax.utils.common.constants import Keys -from ax.utils.common.typeutils import checked_cast from ax.utils.testing.core_stubs import ( get_branin_experiment, get_branin_experiment_with_multi_objective, @@ -113,14 +114,21 @@ def get_soo_surrogate() -> SOOSurrogateBenchmarkProblem: outcome_names=["branin"], get_surrogate_and_datasets=lambda: (surrogate, []), ) + + observe_noise_sd = True + objective = Objective( + metric=BenchmarkMetric( + name="branin", lower_is_better=True, observe_noise_sd=observe_noise_sd + ), + ) + optimization_config = OptimizationConfig(objective=objective) + return SOOSurrogateBenchmarkProblem( name="test", search_space=experiment.search_space, - optimization_config=checked_cast( - OptimizationConfig, experiment.optimization_config - ), + optimization_config=optimization_config, num_trials=6, - observe_noise_stds=True, + observe_noise_stds=observe_noise_sd, optimal_value=0.0, runner=runner, is_noiseless=runner.is_noiseless, @@ -143,12 +151,31 @@ def get_moo_surrogate() -> MOOSurrogateBenchmarkProblem: outcome_names=["branin_a", "branin_b"], get_surrogate_and_datasets=lambda: (surrogate, []), ) + observe_noise_sd = True + optimization_config = MultiObjectiveOptimizationConfig( + objective=MultiObjective( + objectives=[ + Objective( + metric=BenchmarkMetric( + name="branin_a", + lower_is_better=True, + observe_noise_sd=observe_noise_sd, + ), + ), + Objective( + metric=BenchmarkMetric( + name="branin_b", + lower_is_better=True, + observe_noise_sd=observe_noise_sd, + ), + ), + ], + ) + ) return MOOSurrogateBenchmarkProblem( name="test", search_space=experiment.search_space, - optimization_config=checked_cast( - MultiObjectiveOptimizationConfig, experiment.optimization_config - ), + optimization_config=optimization_config, num_trials=10, observe_noise_stds=True, optimal_value=1.0,