From 200349fa2aa3ab9f89ac516ec225e7351f10f2a3 Mon Sep 17 00:00:00 2001
From: Elizabeth Santorella <santorella@meta.com>
Date: Mon, 26 Aug 2024 18:05:25 -0700
Subject: [PATCH] Compute ground-truth optimization trace on BenchmarkProblem
 (#2704)

Summary:
Pull Request resolved: https://github.com/facebook/Ax/pull/2704

This currently doesn't change behavior, but enables more flexibility by redefining `Problem.get_ground_truth_opt_trace` or `BenchmarkRunner.get_Y_true`. This is currently duplicative with the tracking metrics setup, which will be reaped in D61432000; these should be combined. This diff has both ways of computing the optimization trace running, along with an assertion that they give the same results.

Reviewed By: saitcakmak

Differential Revision: D61404056
---
 ax/benchmark/benchmark.py                     |   3 +
 ax/benchmark/benchmark_problem.py             | 111 +++++++++++++-----
 ax/benchmark/runners/botorch_test.py          |  35 +++---
 .../problems/synthetic/hss/test_jenatton.py   |   5 +-
 .../tests/problems/test_surrogate_problems.py |   2 +-
 ax/utils/testing/benchmark_stubs.py           |  43 +++++--
 6 files changed, 139 insertions(+), 60 deletions(-)

diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py
index bbbe9b1149d..14dca5c998e 100644
--- a/ax/benchmark/benchmark.py
+++ b/ax/benchmark/benchmark.py
@@ -151,6 +151,9 @@ def benchmark_replication(
         scheduler.get_trace(optimization_config=analysis_opt_config)
     )
 
+    new_optimization_trace = problem.get_opt_trace(experiment=experiment)
+    np.testing.assert_allclose(optimization_trace, new_optimization_trace)
+
     try:
         # Catch any errors that may occur during score computation, such as errors
         # while accessing "steps" in node based generation strategies. The error
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
index eac5009572f..77f5dabee69 100644
--- a/ax/benchmark/benchmark_problem.py
+++ b/ax/benchmark/benchmark_problem.py
@@ -8,10 +8,16 @@
 from dataclasses import dataclass, field
 from typing import Any, Optional, Union
 
+import numpy as np
+import pandas as pd
+
 from ax.benchmark.metrics.base import BenchmarkMetricBase
 
 from ax.benchmark.metrics.benchmark import BenchmarkMetric
+from ax.benchmark.runners.base import BenchmarkRunner
 from ax.benchmark.runners.botorch_test import BotorchTestProblemRunner
+from ax.core.data import Data
+from ax.core.experiment import Experiment
 from ax.core.objective import MultiObjective, Objective
 from ax.core.optimization_config import (
     MultiObjectiveOptimizationConfig,
@@ -20,9 +26,9 @@
 )
 from ax.core.outcome_constraint import OutcomeConstraint
 from ax.core.parameter import ParameterType, RangeParameter
-from ax.core.runner import Runner
 from ax.core.search_space import SearchSpace
 from ax.core.types import ComparisonOp
+from ax.service.utils.best_point_mixin import BestPointMixin
 from ax.utils.common.base import Base
 from ax.utils.common.typeutils import checked_cast
 from botorch.test_functions.base import (
@@ -31,6 +37,7 @@
     MultiObjectiveTestProblem,
 )
 from botorch.test_functions.synthetic import SyntheticTestFunction
+from pyre_extensions import assert_is_instance
 
 
 def _get_name(
@@ -89,9 +96,52 @@ class BenchmarkProblem(Base):
     optimal_value: float
 
     search_space: SearchSpace = field(repr=False)
-    runner: Runner = field(repr=False)
+    runner: BenchmarkRunner = field(repr=False)
     is_noiseless: bool
 
+    def get_oracle_experiment(self, experiment: Experiment) -> Experiment:
+        records = []
+
+        new_experiment = Experiment(
+            search_space=self.search_space, optimization_config=self.optimization_config
+        )
+        for trial_index, trial in experiment.trials.items():
+            for arm in trial.arms:
+                for metric_name, metric_value in zip(
+                    self.runner.outcome_names, self.runner.evaluate_oracle(arm=arm)
+                ):
+                    records.append(
+                        {
+                            "arm_name": arm.name,
+                            "metric_name": metric_name,
+                            "mean": metric_value.item(),
+                            "sem": 0.0,
+                            "trial_index": trial_index,
+                        }
+                    )
+
+            new_experiment.attach_trial(
+                parameterizations=[arm.parameters for arm in trial.arms],
+                arm_names=[arm.name for arm in trial.arms],
+            )
+        for trial in new_experiment.trials.values():
+            trial.mark_completed()
+
+        data = Data(df=pd.DataFrame.from_records(records))
+        new_experiment.attach_data(data=data, overwrite_existing_data=True)
+        return new_experiment
+
+    def get_opt_trace(self, experiment: Experiment) -> np.ndarray:
+        """Evaluate the optimization trace of a list of Trials."""
+        oracle_experiment = self.get_oracle_experiment(experiment=experiment)
+
+        return np.array(
+            BestPointMixin._get_trace(
+                experiment=oracle_experiment,
+                optimization_config=self.optimization_config,
+            )
+        )
+
 
 # TODO: Support constrained MOO problems.
 def get_soo_config_and_outcome_names(
@@ -141,6 +191,20 @@ def get_soo_config_and_outcome_names(
     return opt_config, outcome_names
 
 
+def get_continuous_search_space(bounds: list[tuple[float, float]]) -> SearchSpace:
+    return SearchSpace(
+        parameters=[
+            RangeParameter(
+                name=f"x{i}",
+                parameter_type=ParameterType.FLOAT,
+                lower=lower,
+                upper=upper,
+            )
+            for i, (lower, upper) in enumerate(bounds)
+        ]
+    )
+
+
 def create_single_objective_problem_from_botorch(
     test_problem_class: type[SyntheticTestFunction],
     test_problem_kwargs: dict[str, Any],
@@ -169,17 +233,7 @@ def create_single_objective_problem_from_botorch(
     test_problem = test_problem_class(**test_problem_kwargs)
     is_constrained = isinstance(test_problem, ConstrainedBaseTestProblem)
 
-    search_space = SearchSpace(
-        parameters=[
-            RangeParameter(
-                name=f"x{i}",
-                parameter_type=ParameterType.FLOAT,
-                lower=lower,
-                upper=upper,
-            )
-            for i, (lower, upper) in enumerate(test_problem._bounds)
-        ]
-    )
+    search_space = get_continuous_search_space(test_problem._bounds)
 
     dim = test_problem_kwargs.get("dim", None)
     name = _get_name(
@@ -249,18 +303,11 @@ def create_multi_objective_problem_from_botorch(
     # pyre-fixme [45]: Invalid class instantiation
     test_problem = test_problem_class(**test_problem_kwargs)
 
-    problem = create_single_objective_problem_from_botorch(
-        # pyre-fixme [6]: Passing a multi-objective problem where a
-        # single-objective problem is expected.
-        test_problem_class=test_problem_class,
-        test_problem_kwargs=test_problem_kwargs,
-        lower_is_better=True,  # Seems like we always assume minimization for MOO?
-        num_trials=num_trials,
-        observe_noise_sd=observe_noise_sd,
+    dim = test_problem_kwargs.get("dim", None)
+    name = _get_name(
+        test_problem=test_problem, observe_noise_sd=observe_noise_sd, dim=dim
     )
 
-    name = problem.name
-
     n_obj = test_problem.num_objectives
     if not observe_noise_sd:
         noise_sds = [None] * n_obj
@@ -292,15 +339,25 @@ def create_multi_objective_problem_from_botorch(
             for i, metric in enumerate(metrics)
         ],
     )
+    runner = BotorchTestProblemRunner(
+        test_problem_class=test_problem_class,
+        test_problem_kwargs=test_problem_kwargs,
+        outcome_names=[
+            objective.metric.name
+            for objective in assert_is_instance(
+                optimization_config.objective, MultiObjective
+            ).objectives
+        ],
+    )
 
     return MultiObjectiveBenchmarkProblem(
         name=name,
-        search_space=problem.search_space,
+        search_space=get_continuous_search_space(test_problem._bounds),
         optimization_config=optimization_config,
-        runner=problem.runner,
+        runner=runner,
         num_trials=num_trials,
-        is_noiseless=problem.is_noiseless,
+        is_noiseless=test_problem.noise_std in (None, 0.0),
         observe_noise_stds=observe_noise_sd,
-        has_ground_truth=problem.has_ground_truth,
+        has_ground_truth=True,
         optimal_value=test_problem.max_hv,
     )
diff --git a/ax/benchmark/runners/botorch_test.py b/ax/benchmark/runners/botorch_test.py
index 90fddbdefda..3a2fadb4d32 100644
--- a/ax/benchmark/runners/botorch_test.py
+++ b/ax/benchmark/runners/botorch_test.py
@@ -17,10 +17,7 @@
 from ax.utils.common.base import Base
 from ax.utils.common.equality import equality_typechecker
 from ax.utils.common.serialization import TClassDecoderRegistry, TDecoderRegistry
-from botorch.test_functions.synthetic import (
-    ConstrainedSyntheticTestFunction,
-    SyntheticTestFunction,
-)
+from botorch.test_functions.synthetic import BaseTestProblem, ConstrainedBaseTestProblem
 from botorch.utils.transforms import normalize, unnormalize
 from pyre_extensions import assert_is_instance
 from torch import Tensor
@@ -60,22 +57,22 @@ def __eq__(self, other: Any) -> bool:
 
 class SyntheticProblemRunner(BenchmarkRunner, ABC):
     """A Runner for evaluating synthetic problems, either BoTorch
-    `SyntheticTestFunction`s or Ax benchmarking `ParamBasedTestProblem`s.
+    `BaseTestProblem`s or Ax benchmarking `ParamBasedTestProblem`s.
 
     Given a trial, the Runner will evaluate the problem noiselessly for each
     arm in the trial, as well as return some metadata about the underlying
     problem such as the noise_std.
     """
 
-    test_problem: Union[SyntheticTestFunction, ParamBasedTestProblem]
+    test_problem: Union[BaseTestProblem, ParamBasedTestProblem]
     _is_constrained: bool
-    _test_problem_class: type[Union[SyntheticTestFunction, ParamBasedTestProblem]]
+    _test_problem_class: type[Union[BaseTestProblem, ParamBasedTestProblem]]
     _test_problem_kwargs: Optional[dict[str, Any]]
 
     def __init__(
         self,
         *,
-        test_problem_class: type[Union[SyntheticTestFunction, ParamBasedTestProblem]],
+        test_problem_class: type[Union[BaseTestProblem, ParamBasedTestProblem]],
         test_problem_kwargs: dict[str, Any],
         outcome_names: list[str],
         modified_bounds: Optional[list[tuple[float, float]]] = None,
@@ -83,7 +80,7 @@ def __init__(
         """Initialize the test problem runner.
 
         Args:
-            test_problem_class: A BoTorch `SyntheticTestFunction` class or Ax
+            test_problem_class: A BoTorch `BaseTestProblem` class or Ax
                 `ParamBasedTestProblem` class.
             test_problem_kwargs: The keyword arguments used for initializing the
                 test problem.
@@ -105,12 +102,12 @@ def __init__(
             # abstract class with abstract method `evaluate_true`.
             test_problem_class(**test_problem_kwargs)
         )
-        if isinstance(self.test_problem, SyntheticTestFunction):
+        if isinstance(self.test_problem, BaseTestProblem):
             self.test_problem = self.test_problem.to(dtype=torch.double)
-        # A `ConstrainedSyntheticTestFunction` is a type of `SyntheticTestFunction`; a
+        # A `ConstrainedBaseTestProblem` is a type of `BaseTestProblem`; a
         # `ParamBasedTestProblem` is never constrained.
         self._is_constrained: bool = isinstance(
-            self.test_problem, ConstrainedSyntheticTestFunction
+            self.test_problem, ConstrainedBaseTestProblem
         )
         self._is_moo: bool = self.test_problem.num_objectives > 1
         self.outcome_names = outcome_names
@@ -202,10 +199,10 @@ def deserialize_init_args(
 
 class BotorchTestProblemRunner(SyntheticProblemRunner):
     """
-    A `SyntheticProblemRunner` for BoTorch `SyntheticTestFunction`s.
+    A `SyntheticProblemRunner` for BoTorch `BaseTestProblem`s.
 
     Args:
-        test_problem_class: A BoTorch `SyntheticTestFunction` class.
+        test_problem_class: A BoTorch `BaseTestProblem` class.
         test_problem_kwargs: The keyword arguments used for initializing the
             test problem.
         outcome_names: The names of the outcomes returned by the problem.
@@ -223,7 +220,7 @@ class BotorchTestProblemRunner(SyntheticProblemRunner):
     def __init__(
         self,
         *,
-        test_problem_class: type[SyntheticTestFunction],
+        test_problem_class: type[BaseTestProblem],
         test_problem_kwargs: dict[str, Any],
         outcome_names: list[str],
         modified_bounds: Optional[list[tuple[float, float]]] = None,
@@ -234,11 +231,9 @@ def __init__(
             outcome_names=outcome_names,
             modified_bounds=modified_bounds,
         )
-        self.test_problem: SyntheticTestFunction = self.test_problem.to(
-            dtype=torch.double
-        )
+        self.test_problem: BaseTestProblem = self.test_problem.to(dtype=torch.double)
         self._is_constrained: bool = isinstance(
-            self.test_problem, ConstrainedSyntheticTestFunction
+            self.test_problem, ConstrainedBaseTestProblem
         )
 
     def get_Y_true(self, arm: Arm) -> Tensor:
@@ -274,7 +269,7 @@ def get_Y_true(self, arm: Arm) -> Tensor:
             X = unnormalize(unit_X, self.test_problem.bounds)
 
         Y_true = self.test_problem.evaluate_true(X).view(-1)
-        # `SyntheticTestFunction.evaluate_true()` does not negate the outcome
+        # `BaseTestProblem.evaluate_true()` does not negate the outcome
         if self.test_problem.negate:
             Y_true = -Y_true
 
diff --git a/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py b/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py
index 11a634ce26e..5f8ee63db30 100644
--- a/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py
+++ b/ax/benchmark/tests/problems/synthetic/hss/test_jenatton.py
@@ -14,7 +14,6 @@
     get_jenatton_benchmark_problem,
     jenatton_test_function,
 )
-from ax.benchmark.runners.base import BenchmarkRunner
 from ax.benchmark.runners.botorch_test import ParamBasedTestProblemRunner
 from ax.core.arm import Arm
 from ax.core.data import Data
@@ -96,9 +95,7 @@ def test_jenatton_test_function(self) -> None:
                 value,
             )
             self.assertAlmostEqual(
-                assert_is_instance(benchmark_problem.runner, BenchmarkRunner)
-                .get_Y_true(arm)
-                .item(),
+                benchmark_problem.runner.evaluate_oracle(arm).item(),
                 value,
                 places=6,
             )
diff --git a/ax/benchmark/tests/problems/test_surrogate_problems.py b/ax/benchmark/tests/problems/test_surrogate_problems.py
index c9c2a334096..7295d617f5f 100644
--- a/ax/benchmark/tests/problems/test_surrogate_problems.py
+++ b/ax/benchmark/tests/problems/test_surrogate_problems.py
@@ -34,7 +34,7 @@ def test_repr(self) -> None:
             "SOOSurrogateBenchmarkProblem(name='test', "
             "optimization_config=OptimizationConfig(objective=Objective(metric_name="
             '"branin", '
-            "minimize=False), "
+            "minimize=True), "
             "outcome_constraints=[]), num_trials=6, "
             "observe_noise_stds=True, has_ground_truth=True, "
             "tracking_metrics=[], optimal_value=0.0, is_noiseless=True)"
diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
index e1b390c8a15..fa681ea61a9 100644
--- a/ax/utils/testing/benchmark_stubs.py
+++ b/ax/utils/testing/benchmark_stubs.py
@@ -18,6 +18,7 @@
     MultiObjectiveBenchmarkProblem,
 )
 from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult
+from ax.benchmark.metrics.benchmark import BenchmarkMetric
 from ax.benchmark.problems.surrogate import (
     MOOSurrogateBenchmarkProblem,
     SOOSurrogateBenchmarkProblem,
@@ -25,6 +26,7 @@
 from ax.benchmark.runners.botorch_test import ParamBasedTestProblem
 from ax.benchmark.runners.surrogate import SurrogateRunner
 from ax.core.experiment import Experiment
+from ax.core.objective import MultiObjective, Objective
 from ax.core.optimization_config import (
     MultiObjectiveOptimizationConfig,
     OptimizationConfig,
@@ -36,7 +38,6 @@
 from ax.models.torch.botorch_modular.surrogate import Surrogate
 from ax.service.scheduler import SchedulerOptions
 from ax.utils.common.constants import Keys
-from ax.utils.common.typeutils import checked_cast
 from ax.utils.testing.core_stubs import (
     get_branin_experiment,
     get_branin_experiment_with_multi_objective,
@@ -113,14 +114,21 @@ def get_soo_surrogate() -> SOOSurrogateBenchmarkProblem:
         outcome_names=["branin"],
         get_surrogate_and_datasets=lambda: (surrogate, []),
     )
+
+    observe_noise_sd = True
+    objective = Objective(
+        metric=BenchmarkMetric(
+            name="branin", lower_is_better=True, observe_noise_sd=observe_noise_sd
+        ),
+    )
+    optimization_config = OptimizationConfig(objective=objective)
+
     return SOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
-        optimization_config=checked_cast(
-            OptimizationConfig, experiment.optimization_config
-        ),
+        optimization_config=optimization_config,
         num_trials=6,
-        observe_noise_stds=True,
+        observe_noise_stds=observe_noise_sd,
         optimal_value=0.0,
         runner=runner,
         is_noiseless=runner.is_noiseless,
@@ -143,12 +151,31 @@ def get_moo_surrogate() -> MOOSurrogateBenchmarkProblem:
         outcome_names=["branin_a", "branin_b"],
         get_surrogate_and_datasets=lambda: (surrogate, []),
     )
+    observe_noise_sd = True
+    optimization_config = MultiObjectiveOptimizationConfig(
+        objective=MultiObjective(
+            objectives=[
+                Objective(
+                    metric=BenchmarkMetric(
+                        name="branin_a",
+                        lower_is_better=True,
+                        observe_noise_sd=observe_noise_sd,
+                    ),
+                ),
+                Objective(
+                    metric=BenchmarkMetric(
+                        name="branin_b",
+                        lower_is_better=True,
+                        observe_noise_sd=observe_noise_sd,
+                    ),
+                ),
+            ],
+        )
+    )
     return MOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
-        optimization_config=checked_cast(
-            MultiObjectiveOptimizationConfig, experiment.optimization_config
-        ),
+        optimization_config=optimization_config,
         num_trials=10,
         observe_noise_stds=True,
         optimal_value=1.0,