From 36cfa53df01c177ff0fd99fd2df2f7e83cb6a1a6 Mon Sep 17 00:00:00 2001 From: Qing Feng Date: Wed, 21 Jun 2023 17:37:37 -0700 Subject: [PATCH] move sparseBO to OSS (#1676) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/1676 Move SEBO-L0/L1 to OSS - Move homotopy from botorch/fb to botorch - Move SEBO from ax/fb to ax - Move deterministic_models from ax/fb to ax - Update and move storage from ax/fb to ax Differential Revision: D46528626 fbshipit-source-id: 49dd854fbc8d3e8c962b8e147864929e0dd43b02 --- ax/models/tests/test_deterministic_metric.py | 234 +++++++++++ ax/models/torch/botorch_modular/sebo.py | 396 ++++++++++++++++++ ax/models/torch/deterministic_metric.py | 101 +++++ ax/models/torch/tests/test_sebo.py | 334 +++++++++++++++ ax/storage/botorch_modular_registry.py | 4 + .../json_store/tests/test_json_store.py | 2 + ax/utils/testing/core_stubs.py | 5 + 7 files changed, 1076 insertions(+) create mode 100644 ax/models/tests/test_deterministic_metric.py create mode 100644 ax/models/torch/botorch_modular/sebo.py create mode 100644 ax/models/torch/deterministic_metric.py create mode 100644 ax/models/torch/tests/test_sebo.py diff --git a/ax/models/tests/test_deterministic_metric.py b/ax/models/tests/test_deterministic_metric.py new file mode 100644 index 00000000000..61f67c1e5c7 --- /dev/null +++ b/ax/models/tests/test_deterministic_metric.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +from typing import Any, Dict +from unittest import mock + +import torch +from ax.core.search_space import SearchSpaceDigest +from ax.models.torch.botorch_defaults import get_NEI +from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel +from ax.models.torch.deterministic_metric import ( + get_and_fit_model_list_det, + L1_norm_func, +) +from ax.models.torch_base import TorchOptConfig +from ax.utils.common.testutils import TestCase +from ax.utils.testing.mock import fast_botorch_optimize +from botorch.models import ModelList +from botorch.utils.datasets import FixedNoiseDataset +from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization + + +FIT_MODEL_MO_PATH = "ax.models.torch.deterministic_metric.fit_gpytorch_mll" +CHEBYSHEV_SCALARIZATION_PATH = ( + "ax.models.torch.botorch_defaults.get_chebyshev_scalarization" +) + + +# pyre-fixme[3]: Return type must be annotated. +def _get_torch_test_data( + dtype: torch.dtype = torch.float, + cuda: bool = False, + constant_noise: bool = True, + # pyre-fixme[2]: Parameter must be annotated. + task_features=None, +): + device = torch.device("cuda") if cuda else torch.device("cpu") + Xs = [torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], dtype=dtype, device=device)] + Ys = [torch.tensor([[3.0], [4.0]], dtype=dtype, device=device)] + Yvars = [torch.tensor([[0.0], [2.0]], dtype=dtype, device=device)] + if constant_noise: + Yvars[0].fill_(1.0) + bounds = [(0.0, 1.0), (1.0, 4.0), (2.0, 5.0)] + feature_names = ["x1", "x2", "x3"] + task_features = [] if task_features is None else task_features + metric_names = ["y", "r", "d"] + return Xs, Ys, Yvars, bounds, task_features, feature_names, metric_names + + +class BotorchDeterministicMetricMOOModelTest(TestCase): + def test_l1_norm_func(self, cuda: bool = False) -> None: + device = torch.device("cuda") if cuda else torch.device("cpu") + for dtype in (torch.float, torch.double): + init_point = torch.tensor([1.0, 1.0, 1.0], device=device, dtype=dtype) + # testing a batch of two points + sample_point = torch.tensor( + [[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], device=device, dtype=dtype + ) + real_values = torch.norm( + (sample_point - init_point), p=1, dim=-1, keepdim=True + ) + computed_values = L1_norm_func(X=sample_point, init_point=init_point) + self.assertTrue(torch.equal(real_values, computed_values)) + + def test_l1_norm_func_cuda(self) -> None: + if torch.cuda.is_available(): + self.test_l1_norm_func(cuda=True) + + @fast_botorch_optimize + def test_deterministic_metric_BotorchMOOModel_with_cheby_scalarization( + self, + dtype: torch.dtype = torch.float, + cuda: bool = False, + ) -> None: + tkwargs: Dict[str, Any] = { + "device": torch.device("cuda") if cuda else torch.device("cpu"), + "dtype": dtype, + } + Xs1, Ys1, Yvars1, bounds, tfs, fns, mns = _get_torch_test_data( + dtype=dtype, cuda=cuda, constant_noise=True + ) + Xs2, Ys2, Yvars2, _, _, _, _ = _get_torch_test_data( + dtype=dtype, cuda=cuda, constant_noise=True + ) + Xs3, Ys3, Yvars3, _, _, _, _ = _get_torch_test_data( + dtype=dtype, cuda=cuda, constant_noise=True + ) + n = 3 + objective_weights = torch.tensor([1.0, 1.0, 1.0], **tkwargs) + obj_t = torch.tensor([1.0, 1.0, 1.0], **tkwargs) + + L1_norm_penalty = functools.partial( + L1_norm_func, + init_point=torch.zeros(3), + ) + + # test when det_metric_names is not any of the metric names + det_metric_names = ["wrong_name"] + det_metric_funcs = {"wrong_name": L1_norm_penalty} + model = MultiObjectiveBotorchModel( + # pyre-fixme[6]: For 1st param expected `(List[Tensor], List[Tensor], Lis... + model_constructor=get_and_fit_model_list_det, + # pyre-fixme[6]: For 2nd param expected `(Model, Tensor, Optional[Tuple[T... + acqf_constructor=get_NEI, + det_metric_names=det_metric_names, + det_metric_funcs=det_metric_funcs, + ) + datasets = [ + FixedNoiseDataset(X=X, Y=Y, Yvar=Y) + for X, Y, Yvar in zip( + Xs1 + Xs2 + Xs3, Ys1 + Ys2 + Ys3, Yvars1 + Yvars2 + Yvars3 + ) + ] + with self.assertRaises(ValueError): + model.fit( + datasets=datasets, + metric_names=mns, + search_space_digest=SearchSpaceDigest( + feature_names=fns, + bounds=bounds, + task_features=tfs, + ), + ) + # test when det_metric_names matches + det_metric_names = ["d"] + det_metric_funcs = {"d": L1_norm_penalty} + model = MultiObjectiveBotorchModel( + # pyre-fixme[6]: For 1st param expected `(List[Tensor], List[Tensor], Lis... + model_constructor=get_and_fit_model_list_det, + # pyre-fixme[6]: For 2nd param expected `(Model, Tensor, Optional[Tuple[T... + acqf_constructor=get_NEI, + det_metric_names=det_metric_names, + det_metric_funcs=det_metric_funcs, + ) + # test that task_features are not supported + with self.assertRaises(NotImplementedError): + model.fit( + datasets=datasets, + metric_names=mns, + search_space_digest=SearchSpaceDigest( + feature_names=fns, + bounds=bounds, + task_features=[0], + ), + ) + # test that fidelity_features are not supported + with self.assertRaises(NotImplementedError): + model.fit( + datasets=datasets, + metric_names=mns, + search_space_digest=SearchSpaceDigest( + feature_names=fns, + bounds=bounds, + fidelity_features=[0], + ), + ) + # test fitting + search_space_digest = SearchSpaceDigest( + feature_names=fns, + bounds=bounds, + task_features=tfs, + ) + with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: + model.fit( + datasets=datasets, + metric_names=mns, + search_space_digest=search_space_digest, + ) + # expect only fitting 2 GPs out of 3 objectives + self.assertEqual(_mock_fit_model.call_count, 2) + + # test passing state_dict + # pyre-fixme[16]: Optional type has no attribute `state_dict` + state_dict = model.model.state_dict() + with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: + model.model_constructor( # pyre-ignore: [28] + Xs=model.Xs, + Ys=model.Ys, + Yvars=model.Yvars, + task_features=model.task_features, + state_dict=state_dict, + fidelity_features=model.fidelity_features, + metric_names=model.metric_names, + refit_model=False, + use_input_warping=model.use_input_warping, + use_loocv_pseudo_likelihood=model.use_loocv_pseudo_likelihood, + **model._kwargs, + ) + # load state dict without fitting + self.assertEqual(_mock_fit_model.call_count, 0) + + # test that use_loocv_pseudo_likelihood = True + model = MultiObjectiveBotorchModel( + # pyre-fixme[6]: For 1st param expected `(List[Tensor], List[Tensor], Lis... + model_constructor=get_and_fit_model_list_det, + # pyre-fixme[6]: For 2nd param expected `(Model, Tensor, Optional[Tuple[T... + acqf_constructor=get_NEI, + det_metric_names=det_metric_names, + det_metric_funcs=det_metric_funcs, + use_loocv_pseudo_likelihood=True, + ) + self.assertTrue(model.use_loocv_pseudo_likelihood) + with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: + model.fit( + datasets=datasets, + metric_names=mns, + search_space_digest=search_space_digest, + ) + self.assertIsInstance(model.model, ModelList) + # pyre-ignore + self.assertEqual(len(model.model.models), 3) + + with mock.patch( + CHEBYSHEV_SCALARIZATION_PATH, wraps=get_chebyshev_scalarization + ) as _mock_chebyshev_scalarization: + model.gen( + n, + search_space_digest=search_space_digest, + torch_opt_config=TorchOptConfig( + objective_weights=objective_weights, + objective_thresholds=obj_t, + model_gen_options={ + "acquisition_function_kwargs": { + "chebyshev_scalarization": True, + }, + }, + ), + ) + # get_chebyshev_scalarization should be called once for generated candidate. + self.assertEqual(n, _mock_chebyshev_scalarization.call_count) diff --git a/ax/models/torch/botorch_modular/sebo.py b/ax/models/torch/botorch_modular/sebo.py new file mode 100644 index 00000000000..573af210888 --- /dev/null +++ b/ax/models/torch/botorch_modular/sebo.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import functools +from copy import deepcopy +from functools import partial +from typing import Any, Callable, Dict, List, Optional, Tuple, Type + +import torch +from ax.core.search_space import SearchSpaceDigest +from ax.models.torch.botorch_modular.acquisition import Acquisition +from ax.models.torch.botorch_modular.optimizer_argparse import optimizer_argparse +from ax.models.torch.botorch_modular.surrogate import Surrogate +from ax.models.torch.deterministic_metric import L1_norm_func +from ax.models.torch_base import TorchOptConfig +from ax.utils.common.constants import Keys +from botorch.acquisition.acquisition import AcquisitionFunction +from botorch.acquisition.multi_objective.monte_carlo import ( + qExpectedHypervolumeImprovement, +) +from botorch.acquisition.penalized import L0Approximation +from botorch.models.deterministic import GenericDeterministicModel +from botorch.models.model import ModelList +from botorch.optim import ( + Homotopy, + HomotopyParameter, + LogLinearHomotopySchedule, + optimize_acqf_homotopy, +) +from botorch.utils.datasets import SupervisedDataset +from botorch.utils.multi_objective.pareto import is_non_dominated +from torch import Tensor +from torch.quasirandom import SobolEngine + +CLAMP_TOL = 0.01 + + +class SEBOAcquisition(Acquisition): + """ + Implement the acquisition function of Sparsity Exploring Bayesian + Optimization (SEBO). + + The SEBO is a hyperparameter-free method to simultaneously maximize a target + objective and sparsity. When L0 norm is used, SEBO uses a novel differentiable + relaxation based on homotopy continuation to efficiently optimize for sparsity. + """ + + def __init__( + self, + surrogates: Dict[str, Surrogate], + search_space_digest: SearchSpaceDigest, + torch_opt_config: TorchOptConfig, + botorch_acqf_class: Type[AcquisitionFunction], + options: Optional[Dict[str, Any]] = None, + ) -> None: + if len(surrogates) > 1: + raise ValueError("SEBO does not support support multiple surrogates.") + surrogate = surrogates[Keys.ONLY_SURROGATE] + + tkwargs = {"dtype": surrogate.dtype, "device": surrogate.device} + options = options or {} + self.penalty_name: str = options.get("penalty", "L0_norm") + self.target_point: Tensor = options.get("target_point", None) + if self.target_point is None: + raise ValueError("please provide target point.") + self.target_point.to(**tkwargs) # pyre-ignore + self.sparsity_threshold: int = options.get( + "sparsity_threshold", surrogate.Xs[0].shape[-1] + ) + # construct determinsitic model for penalty term + # pyre-fixme[4]: Attribute must be annotated. + self.deterministic_model = self._construct_penalty() + + surrogate_f = deepcopy(surrogate) + # update the training data in new surrogate + surrogate_f._training_data.append( # pyre-ignore + SupervisedDataset( + surrogate_f.Xs[0], + self.deterministic_model(surrogate_f.Xs[0]), + # append Yvar as zero for penalty term + torch.zeros(surrogate_f.Xs[0].shape[0], 1, **tkwargs), # pyre-ignore + ) + ) + # update the model in new surrogate + surrogate_f._model = ModelList(surrogate.model, self.deterministic_model) + self.det_metric_indx = -1 + + # update objective weights and thresholds in the torch config + torch_opt_config_sebo = self._transform_torch_config( + torch_opt_config, **tkwargs + ) + + # instantiate botorch_acqf_class + super().__init__( + surrogates={"sebo": surrogate_f}, + search_space_digest=search_space_digest, + torch_opt_config=torch_opt_config_sebo, + botorch_acqf_class=botorch_acqf_class, + options=options, + ) + if not isinstance(self.acqf, qExpectedHypervolumeImprovement): + raise ValueError("botorch_acqf_class must be qEHVI to use SEBO") + + # update objective threshold for deterministic model (penalty term) + self.acqf.ref_point[-1] = self.sparsity_threshold * -1 + # pyre-ignore + self._objective_thresholds[-1] = self.sparsity_threshold + + Y_pareto = torch.cat( + # pyre-ignore[16] + [d.Y.values for d in self.surrogates["sebo"].training_data], + dim=-1, + ) + ind_pareto = is_non_dominated(Y_pareto * self._full_objective_weights) + # pyre-ignore + self.X_pareto = self.surrogates["sebo"].Xs[0][ind_pareto].clone() + + def _construct_penalty(self) -> GenericDeterministicModel: + """Construct a penalty term as deterministic model to be included in + SEBO acqusition function. Currently only L0 and L1 penalty are supported. + """ + if self.penalty_name == "L0_norm": + L0 = L0Approximation(target_point=self.target_point) + return GenericDeterministicModel(f=L0) + elif self.penalty_name == "L1_norm": + L1 = functools.partial( + L1_norm_func, + init_point=self.target_point, + ) + return GenericDeterministicModel(f=L1) + else: + raise NotImplementedError( + f"{self.penalty_name} is not currently implemented." + ) + + def _transform_torch_config( + self, + torch_opt_config: TorchOptConfig, + **tkwargs: Any, + ) -> TorchOptConfig: + """Transform torch config to include penalty term (deterministic model) as + an additional outcomes in BoTorch model. + """ + # update objective weights by appending the weight (-1) of penalty term + # at the end + ow_sebo = torch.cat( + [torch_opt_config.objective_weights, torch.tensor([-1], **tkwargs)] + ) + if torch_opt_config.outcome_constraints is not None: + # update the shape of A matrix in outcome_constraints + oc_sebo = ( + torch.cat( + [ + torch_opt_config.outcome_constraints[0], + torch.zeros( + # pyre-ignore + torch_opt_config.outcome_constraints[0].shape[0], + 1, + **tkwargs, + ), + ], + dim=1, + ), + torch_opt_config.outcome_constraints[1], + ) + else: + oc_sebo = None + if torch_opt_config.objective_thresholds is not None: + # append the sparsity threshold at the end if objective_thresholds + # is not None + ot_sebo = torch.cat( + [ + torch_opt_config.objective_thresholds, + torch.tensor([self.sparsity_threshold], **tkwargs), + ] + ) + else: + ot_sebo = None + + # update pending observations (if not none) by appending an obs for + # the new penalty outcome + pending_observations = torch_opt_config.pending_observations + if torch_opt_config.pending_observations is not None: + pending_observations = torch_opt_config.pending_observations + [ + torch_opt_config.pending_observations[0] + ] + + return TorchOptConfig( + objective_weights=ow_sebo, + outcome_constraints=oc_sebo, + objective_thresholds=ot_sebo, + linear_constraints=torch_opt_config.linear_constraints, + fixed_features=torch_opt_config.fixed_features, + pending_observations=pending_observations, + model_gen_options=torch_opt_config.model_gen_options, + rounding_func=torch_opt_config.rounding_func, + opt_config_metrics=torch_opt_config.opt_config_metrics, + is_moo=torch_opt_config.is_moo, + ) + + def optimize( + self, + n: int, + search_space_digest: SearchSpaceDigest, + inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None, + fixed_features: Optional[Dict[int, float]] = None, + rounding_func: Optional[Callable[[Tensor], Tensor]] = None, + optimizer_options: Optional[Dict[str, Any]] = None, + ) -> Tuple[Tensor, Tensor]: + """Generate a set of candidates via multi-start optimization. Obtains + candidates and their associated acquisition function values. + + Args: + n: The number of candidates to generate. + search_space_digest: A ``SearchSpaceDigest`` object containing search space + properties, e.g. ``bounds`` for optimization. + inequality_constraints: A list of tuples (indices, coefficients, rhs), + with each tuple encoding an inequality constraint of the form + ``sum_i (X[indices[i]] * coefficients[i]) >= rhs``. + fixed_features: A map `{feature_index: value}` for features that + should be fixed to a particular value during generation. + rounding_func: A function that post-processes an optimization + result appropriately (i.e., according to `round-trip` + transformations). + optimizer_options: Options for the optimizer function, e.g. ``sequential`` + or ``raw_samples``. + """ + if self.penalty_name == "L0_norm": + if inequality_constraints is not None: + raise NotImplementedError( + "Homotopy does not support optimization with inequality " + + "constraints. Use L1 penalty norm instead." + ) + candidates, expected_acquisition_value = self._optimize_with_homotopy( + n=n, + search_space_digest=search_space_digest, + fixed_features=fixed_features, + rounding_func=rounding_func, + optimizer_options=optimizer_options, + ) + else: + # if L1 norm use standard moo-opt + candidates, expected_acquisition_value = super().optimize( + n=n, + search_space_digest=search_space_digest, + inequality_constraints=inequality_constraints, + fixed_features=fixed_features, + rounding_func=rounding_func, + optimizer_options=optimizer_options, + ) + + # similar, make sure if applies to sparse dimensions only + candidates = clamp_candidates( + X=candidates, + target_point=self.target_point, + clamp_tol=CLAMP_TOL, + device=self.device, + dtype=self.dtype, + ) + return candidates, expected_acquisition_value + + def _optimize_with_homotopy( + self, + n: int, + search_space_digest: SearchSpaceDigest, + fixed_features: Optional[Dict[int, float]] = None, + rounding_func: Optional[Callable[[Tensor], Tensor]] = None, + optimizer_options: Optional[Dict[str, Any]] = None, + ) -> Tuple[Tensor, Tensor]: + """Optimize SEBO ACQF with L0 norm using homotopy.""" + # extend to fixed a no homotopy_schedule schedule + _tensorize = partial(torch.tensor, dtype=self.dtype, device=self.device) + ssd = search_space_digest + bounds = _tensorize(ssd.bounds).t() + + homotopy_schedule = LogLinearHomotopySchedule(start=0.1, end=1e-3, num_steps=30) + + # Prepare arguments for optimizer + optimizer_options_with_defaults = optimizer_argparse( + self.acqf, + bounds=bounds, + q=n, + optimizer_options=optimizer_options, + ) + + def callback(): # pyre-ignore + if ( + self.acqf.cache_pending + ): # If true, pending points are concatenated with X_baseline + if self.acqf._max_iep != 0: + raise ValueError( + "The maximum number of pending points (max_iep) must be 0" + ) + X_baseline = self.acqf._X_baseline_and_pending.clone() # pyre-ignore + self.acqf.__init__( # pyre-ignore + X_baseline=X_baseline, + model=self.surrogates["sebo"].model, + ref_point=self.acqf.ref_point, + objective=self.acqf.objective, + ) + else: # We can directly get the pending points here + X_pending = self.acqf.X_pending + self.acqf.__init__( # pyre-ignore + X_baseline=self.X_observed, + model=self.surrogates["sebo"].model, + ref_point=self.acqf.ref_point, + objective=self.acqf.objective, + ) + self.acqf.set_X_pending(X_pending) # pyre-ignore + + homotopy = Homotopy( + homotopy_parameters=[ + HomotopyParameter( + parameter=self.deterministic_model._f.a, + schedule=homotopy_schedule, + ) + ], + callbacks=[callback], + ) + # need to know sparse dimensions + batch_initial_conditions = get_batch_initial_conditions( + acq_function=self.acqf, + raw_samples=optimizer_options_with_defaults["raw_samples"], + X_pareto=self.X_pareto, + target_point=self.target_point, + num_restarts=optimizer_options_with_defaults["num_restarts"], + **{"device": self.device, "dtype": self.dtype}, + ) + candidates, expected_acquisition_value = optimize_acqf_homotopy( + q=n, + acq_function=self.acqf, + bounds=bounds, + homotopy=homotopy, + num_restarts=optimizer_options_with_defaults["num_restarts"], + raw_samples=optimizer_options_with_defaults["raw_samples"], + post_processing_func=rounding_func, + fixed_features=fixed_features, + batch_initial_conditions=batch_initial_conditions, + ) + + return candidates, expected_acquisition_value + + +def clamp_candidates( + X: Tensor, target_point: Tensor, clamp_tol: float, **tkwargs: Any +) -> Tensor: + """Clamp generated candidates within the given ranges to the target point.""" + clamp_mask = (X - target_point).abs() < clamp_tol + clamp_mask = clamp_mask + X[clamp_mask] = ( + target_point.clone().repeat(*X.shape[:-1], 1).to(**tkwargs)[clamp_mask] + ) + return X + + +def get_batch_initial_conditions( + acq_function: AcquisitionFunction, + raw_samples: int, + X_pareto: Tensor, + target_point: Tensor, + num_restarts: int = 20, + **tkwargs: Any, +) -> Tensor: + """Generate starting points for the SEBO acquisition function optimization.""" + dim = X_pareto.shape[-1] # dimension + # (1) Global Sobol points + X_cand1 = SobolEngine(dimension=dim, scramble=True).draw(raw_samples).to(**tkwargs) + X_cand1 = X_cand1[ + acq_function(X_cand1.unsqueeze(1)).topk(num_restarts // 5).indices + ] + # (2) Global Sobol points with a Bernoulli mask + X_cand2 = SobolEngine(dimension=dim, scramble=True).draw(raw_samples).to(**tkwargs) + mask = torch.rand(X_cand2.shape, **tkwargs) < 0.5 + X_cand2[mask] = target_point.repeat(len(X_cand2), 1).to(**tkwargs)[mask] + X_cand2 = X_cand2[ + acq_function(X_cand2.unsqueeze(1)).topk(num_restarts // 5).indices + ] + # (3) Perturbations of points on the Pareto frontier (done by TuRBO and Spearmint) + X_cand3 = X_pareto.clone()[torch.randint(high=len(X_pareto), size=(raw_samples,))] + mask = X_cand3 != target_point + X_cand3[mask] += 0.2 * torch.randn(*X_cand3.shape, **tkwargs)[mask] + X_cand3 = torch.clamp(X_cand3, min=0.0, max=1.0) + X_cand3 = X_cand3[ + acq_function(X_cand3.unsqueeze(1)).topk(num_restarts // 5).indices + ] + # (4) Apply a Bernoulli mask to points on the Pareto frontier + X_cand4 = X_pareto.clone()[torch.randint(high=len(X_pareto), size=(raw_samples,))] + mask = torch.rand(X_cand4.shape, **tkwargs) < 0.5 + X_cand4[mask] = target_point.repeat(len(X_cand4), 1).to(**tkwargs)[mask].clone() + X_cand4 = X_cand4[ + acq_function(X_cand4.unsqueeze(1)).topk(num_restarts // 5).indices + ] + return torch.cat((X_cand1, X_cand2, X_cand3, X_cand4), dim=0).unsqueeze(1) diff --git a/ax/models/torch/deterministic_metric.py b/ax/models/torch/deterministic_metric.py new file mode 100644 index 00000000000..7f431157b1c --- /dev/null +++ b/ax/models/torch/deterministic_metric.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Callable, Dict, List, Optional + +import torch +from ax.models.torch.botorch_defaults import _get_model +from botorch.fit import fit_gpytorch_mll +from botorch.models import ModelList +from botorch.models.deterministic import GenericDeterministicModel +from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood +from gpytorch.mlls.leave_one_out_pseudo_likelihood import LeaveOneOutPseudoLikelihood +from torch import Tensor + + +def get_and_fit_model_list_det( + Xs: List[Tensor], + Ys: List[Tensor], + Yvars: List[Tensor], + task_features: List[int], + fidelity_features: List[int], + metric_names: List[str], + det_metric_names: List[str], + det_metric_funcs: Dict[str, Callable[[Tensor], Tensor]], + state_dict: Optional[Dict[str, Tensor]] = None, + refit_model: bool = True, + use_input_warping: bool = False, + use_loocv_pseudo_likelihood: bool = False, + **kwargs: Any, +) -> ModelList: + r"""Instantiates and fits a botorch ModelList using the given data. + + Args: + Xs: List of X data, one tensor per outcome. + Ys: List of Y data, one tensor per outcome. + Yvars: List of observed variance of Ys. + task_features: List of columns of X that are tasks. + fidelity_features: List of columns of X that are fidelity parameters. + metric_names: Names of each outcome Y in Ys. + det_metric_names: Names of the deterministic outcomes + det_metric_funcs: Dict of deterministic metric function callables + state_dict: If provided, will set model parameters to this state + dictionary. Otherwise, will fit the model. + refit_model: Flag for refitting model. + + Returns: + A fitted ModelListGPyTorchModel. + """ + + if len(fidelity_features) > 0 or len(task_features) > 0: + raise NotImplementedError( + "Currently do not support fidelity_features or task_features!" + ) + if any(m not in metric_names for m in det_metric_names): + raise ValueError("All deterministic metric names must be objective names.") + + models = [] + for i, metric in enumerate(metric_names): + if metric in det_metric_names: + models.append(GenericDeterministicModel(det_metric_funcs[metric])) + else: + # use single task GP for each metric except for the deterministic metrics + models.append( + _get_model( + X=Xs[i], + Y=Ys[i], + Yvar=Yvars[i], + use_input_warping=use_input_warping, + **kwargs, + ) + ) + + model = ModelList(*models) + model.to(Xs[0]) + + if state_dict is not None: + model.load_state_dict(state_dict) + if state_dict is None or refit_model: + # TODO: Add bounds for optimization stability - requires revamp upstream + bounds = {} + if use_loocv_pseudo_likelihood: + mll_cls = LeaveOneOutPseudoLikelihood + else: + mll_cls = ExactMarginalLogLikelihood + for metric, single_model in zip(metric_names, model.models): + # No GP fitting for the deterministic metrics + if metric not in det_metric_names: + mll = mll_cls(single_model.likelihood, single_model) + mll = fit_gpytorch_mll(mll, bounds=bounds) + return model + + +def L1_norm_func(X: Tensor, init_point: Tensor) -> Tensor: + r"""L1_norm takes in a a `batch_shape x n x d`-dim input tensor `X` + to a `batch_shape x n x 1`-dimensional L1 norm tensor. To be used + for constructing a GenericDeterministicModel. + """ + return torch.norm((X - init_point), p=1, dim=-1, keepdim=True) diff --git a/ax/models/torch/tests/test_sebo.py b/ax/models/torch/tests/test_sebo.py new file mode 100644 index 00000000000..e12e943c990 --- /dev/null +++ b/ax/models/torch/tests/test_sebo.py @@ -0,0 +1,334 @@ +#!/usr/bin/env fbpython +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +from __future__ import annotations + +import dataclasses +import functools +from typing import Any, Dict +from unittest import mock +from unittest.mock import Mock + +import torch +from ax.core.search_space import SearchSpaceDigest +from ax.models.torch.botorch_modular.acquisition import Acquisition +from ax.models.torch.botorch_modular.sebo import SEBOAcquisition +from ax.models.torch.botorch_modular.surrogate import Surrogate +from ax.models.torch.deterministic_metric import L1_norm_func +from ax.models.torch_base import TorchOptConfig +from ax.utils.common.constants import Keys +from ax.utils.common.testutils import TestCase +from botorch.acquisition import PosteriorMean +from botorch.acquisition.monte_carlo import qNoisyExpectedImprovement +from botorch.acquisition.multi_objective.monte_carlo import ( + qNoisyExpectedHypervolumeImprovement, +) +from botorch.acquisition.penalized import L0Approximation +from botorch.models.deterministic import GenericDeterministicModel +from botorch.models.gp_regression import SingleTaskGP +from botorch.models.model import ModelList +from botorch.optim import Homotopy, HomotopyParameter, LinearHomotopySchedule +from botorch.utils.datasets import SupervisedDataset +from torch.nn import Parameter + + +SEBOACQUISITION_PATH: str = SEBOAcquisition.__module__ +ACQUISITION_PATH: str = Acquisition.__module__ +CURRENT_PATH: str = __name__ +SURROGATE_PATH: str = Surrogate.__module__ + + +class TestSebo(TestCase): + def setUp(self) -> None: + tkwargs: Dict[str, Any] = {"dtype": torch.double} + self.botorch_model_class = SingleTaskGP + self.surrogates = Surrogate(botorch_model_class=self.botorch_model_class) + self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]], **tkwargs) + self.target_point = torch.tensor([1.0, 1.0, 1.0], **tkwargs) + self.Y = torch.tensor([[3.0], [4.0]], **tkwargs) + self.Yvar = torch.tensor([[0.0], [2.0]], **tkwargs) + self.training_data = [SupervisedDataset(X=self.X, Y=self.Y)] + self.fidelity_features = [2] + self.surrogates.construct( + datasets=self.training_data, + metric_names=["m1"], + fidelity_features=self.fidelity_features, + ) + self.surrogates._outcomes = ["m1"] + self.search_space_digest = SearchSpaceDigest( + feature_names=["a", "b", "c"], + bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], + target_fidelities={2: 1.0}, + ) + self.botorch_acqf_class = qNoisyExpectedHypervolumeImprovement + self.objective_weights = torch.tensor([1.0], **tkwargs) + # new transformed objective weights + self.objective_weights_sebo = torch.tensor([1.0, -1.0], **tkwargs) + self.objective_thresholds = torch.tensor([1.0], **tkwargs) + self.objective_thresholds_sebo = torch.tensor([1.0, 3.0], **tkwargs) + + self.pending_observations = [torch.tensor([[1.0, 3.0, 4.0]], **tkwargs)] + self.outcome_constraints = ( + torch.tensor([[1.0]], **tkwargs), + torch.tensor([[0.5]], **tkwargs), + ) + self.outcome_constraints_sebo = ( + torch.tensor([[1.0, 0.0]], **tkwargs), + torch.tensor([[0.5]], **tkwargs), + ) + self.linear_constraints = None + self.fixed_features = {1: 2.0} + self.options = {"best_f": 0.0, "target_point": self.target_point} + self.inequality_constraints = [ + (torch.tensor([0, 1], **tkwargs), torch.tensor([-1.0, 1.0], **tkwargs), 1) + ] + self.rounding_func = lambda x: x + self.optimizer_options = {Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024} + self.tkwargs = tkwargs + self.torch_opt_config = TorchOptConfig( + objective_weights=self.objective_weights, + objective_thresholds=self.objective_thresholds, + pending_observations=self.pending_observations, + outcome_constraints=self.outcome_constraints, + linear_constraints=self.linear_constraints, + fixed_features=self.fixed_features, + ) + + # pyre-fixme[3]: Return type must be annotated. + # pyre-fixme[2]: Parameter must be annotated. + def get_acquisition_function(self, fixed_features=None, options=None): + return SEBOAcquisition( + botorch_acqf_class=qNoisyExpectedHypervolumeImprovement, + surrogates={Keys.ONLY_SURROGATE: self.surrogates}, + search_space_digest=self.search_space_digest, + torch_opt_config=dataclasses.replace( + self.torch_opt_config, fixed_features=fixed_features or {} + ), + options=options or self.options, + ) + + def test_init(self) -> None: + acquisition1 = self.get_acquisition_function( + options={"target_point": self.target_point}, + ) + # Check that determinstic metric is added to surrogate + surrogate = acquisition1.surrogates["sebo"] + self.assertIsInstance(surrogate._model, ModelList) + self.assertIsInstance(surrogate._model.models[0], SingleTaskGP) + self.assertIsInstance(surrogate._model.models[1], GenericDeterministicModel) + self.assertEqual(acquisition1.det_metric_indx, -1) + + # Check right penalty term is instantiated + self.assertEqual(acquisition1.penalty_name, "L0_norm") + self.assertIsInstance(surrogate._model.models[1]._f, L0Approximation) + + # Check transformed objective threshold + self.assertTrue( + torch.equal( + acquisition1.acqf.ref_point[-1], -1 * self.objective_thresholds_sebo[-1] + ) + ) + self.assertTrue( + torch.equal( + acquisition1.objective_thresholds, self.objective_thresholds_sebo + ) + ) + self.assertEqual(acquisition1.sparsity_threshold, self.X.shape[-1]) + + # Check using non-default penalty + acquisition2 = self.get_acquisition_function( + options={"penalty": "L1_norm", "target_point": self.target_point}, + ) + self.assertEqual(acquisition2.penalty_name, "L1_norm") + self.assertIsInstance( + acquisition2.surrogates["sebo"]._model.models[1]._f, functools.partial + ) + self.assertIs( + acquisition2.surrogates["sebo"]._model.models[1]._f.func, L1_norm_func + ) + + # assert error raise when constructing non L0/L1 penalty terms + with self.assertRaisesRegex( + NotImplementedError, "L2_norm is not currently implemented." + ): + self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L2_norm", "target_point": self.target_point}, + ) + + # assert error raise if multiple surrogates are given + with self.assertRaisesRegex( + ValueError, "SEBO does not support support multiple surrogates." + ): + SEBOAcquisition( + botorch_acqf_class=qNoisyExpectedHypervolumeImprovement, + surrogates={ + Keys.ONLY_SURROGATE: self.surrogates, + "sebo2": self.surrogates, + }, + search_space_digest=self.search_space_digest, + torch_opt_config=self.torch_opt_config, + options=self.options, + ) + + # assert error raise if target point is not given + with self.assertRaisesRegex(ValueError, "please provide target point."): + self.get_acquisition_function(options={"penalty": "L1_norm"}) + + @mock.patch(f"{ACQUISITION_PATH}.optimize_acqf") + def test_optimize_l1(self, mock_optimize_acqf: Mock) -> None: + mock_optimize_acqf.return_value = ( + # pyre-fixme[6]: For 2nd param expected `bool` but got `dtype`. + torch.tensor([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]], **{"dtype": torch.double}), + # pyre-fixme[6]: For 2nd param expected `bool` but got `dtype`. + torch.tensor([1.0, 2.0], **{"dtype": torch.double}), + ) + acquisition = self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L1_norm", "target_point": self.target_point}, + ) + acquisition.optimize( + n=2, + search_space_digest=self.search_space_digest, + inequality_constraints=self.inequality_constraints, + fixed_features=self.fixed_features, + rounding_func=self.rounding_func, + optimizer_options=self.optimizer_options, + ) + + args, kwargs = mock_optimize_acqf.call_args + self.assertEqual(kwargs["acq_function"], acquisition.acqf) + self.assertEqual(kwargs["q"], 2) + self.assertEqual(kwargs["inequality_constraints"], self.inequality_constraints) + self.assertEqual(kwargs["post_processing_func"], self.rounding_func) + self.assertEqual(kwargs["num_restarts"], self.optimizer_options["num_restarts"]) + self.assertEqual(kwargs["raw_samples"], self.optimizer_options["raw_samples"]) + + @mock.patch( + f"{SEBOACQUISITION_PATH}.get_batch_initial_conditions", return_value=None + ) + @mock.patch(f"{SEBOACQUISITION_PATH}.Homotopy") + def test_optimize_l0_homotopy( + self, + mock_homotopy: Mock, + mock_get_batch_initial_conditions: Mock, + ) -> None: + tkwargs = {"dtype": torch.double} + acquisition = self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L0_norm", "target_point": self.target_point}, + ) + # overwrite acqf to validate homotopy + model = GenericDeterministicModel(f=lambda x: 5 - (x - p) ** 2) + acqf = PosteriorMean(model=model) + acquisition.acqf = acqf + + p = Parameter(-2 * torch.ones(1, **tkwargs)) # pyre-ignore + hp = HomotopyParameter( + parameter=p, + schedule=LinearHomotopySchedule(start=4, end=0, num_steps=5), + ) + mock_homotopy.return_value = Homotopy(homotopy_parameters=[hp]) + + search_space_digest = SearchSpaceDigest( + feature_names=["a"], + bounds=[(-10.0, 5.0)], + ) + candidate, acqf_val = acquisition._optimize_with_homotopy( + n=1, + search_space_digest=search_space_digest, + optimizer_options={ + "num_restarts": 2, + "sequential": True, + "raw_samples": 16, + }, + ) + self.assertEqual(candidate, torch.zeros(1, **tkwargs)) # pyre-ignore + self.assertEqual(acqf_val, 5 * torch.ones(1, **tkwargs)) # pyre-ignore + + @mock.patch(f"{SEBOACQUISITION_PATH}.optimize_acqf_homotopy") + def test_optimize_l0(self, mock_optimize_acqf_homotopy: Mock) -> None: + mock_optimize_acqf_homotopy.return_value = ( + # pyre-fixme[6]: For 2nd param expected `bool` but got `dtype`. + torch.tensor([[1.0, 1.0, 1.0], [2.0, 2.0, 2.0]], **{"dtype": torch.double}), + # pyre-fixme[6]: For 2nd param expected `bool` but got `dtype`. + torch.tensor([1.0, 2.0], **{"dtype": torch.double}), + ) + acquisition = self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L0_norm", "target_point": self.target_point}, + ) + acquisition.optimize( + n=2, + search_space_digest=self.search_space_digest, + # does not support in homotopy now + # inequality_constraints=self.inequality_constraints, + fixed_features=self.fixed_features, + rounding_func=self.rounding_func, + optimizer_options=self.optimizer_options, + ) + + args, kwargs = mock_optimize_acqf_homotopy.call_args + self.assertEqual(kwargs["acq_function"], acquisition.acqf) + self.assertEqual(kwargs["q"], 2) + self.assertEqual(kwargs["post_processing_func"], self.rounding_func) + self.assertEqual(kwargs["num_restarts"], self.optimizer_options["num_restarts"]) + self.assertEqual(kwargs["raw_samples"], self.optimizer_options["raw_samples"]) + + # set self.acqf.cache_pending as False + acquisition2 = self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L0_norm", "target_point": self.target_point}, + ) + acquisition2.acqf.cache_pending = torch.tensor(False) + acquisition2.optimize( + n=2, + search_space_digest=self.search_space_digest, + # does not support in homotopy now + # inequality_constraints=self.inequality_constraints, + fixed_features=self.fixed_features, + rounding_func=self.rounding_func, + optimizer_options=self.optimizer_options, + ) + args, kwargs = mock_optimize_acqf_homotopy.call_args + self.assertEqual(kwargs["acq_function"], acquisition2.acqf) + self.assertEqual(kwargs["q"], 2) + self.assertEqual(kwargs["post_processing_func"], self.rounding_func) + self.assertEqual(kwargs["num_restarts"], self.optimizer_options["num_restarts"]) + self.assertEqual(kwargs["raw_samples"], self.optimizer_options["raw_samples"]) + + # assert error raise with inequality_constraints input + acquisition = self.get_acquisition_function( + fixed_features=self.fixed_features, + options={"penalty": "L0_norm", "target_point": self.target_point}, + ) + with self.assertRaisesRegex( + NotImplementedError, + "Homotopy does not support optimization with inequality " + + "constraints. Use L1 penalty norm instead.", + ): + acquisition.optimize( + n=2, + search_space_digest=self.search_space_digest, + inequality_constraints=self.inequality_constraints, + fixed_features=self.fixed_features, + rounding_func=self.rounding_func, + optimizer_options=self.optimizer_options, + ) + + # assert error when using a wrong botorch_acqf_class + with self.assertRaisesRegex( + ValueError, "botorch_acqf_class must be qEHVI to use SEBO" + ): + acquisition = SEBOAcquisition( + botorch_acqf_class=qNoisyExpectedImprovement, + surrogates={Keys.ONLY_SURROGATE: self.surrogates}, + search_space_digest=self.search_space_digest, + torch_opt_config=dataclasses.replace( + self.torch_opt_config, fixed_features=self.fixed_features + ), + options=self.options, + ) diff --git a/ax/storage/botorch_modular_registry.py b/ax/storage/botorch_modular_registry.py index 2ede4b3cf57..d2579cc1eb5 100644 --- a/ax/storage/botorch_modular_registry.py +++ b/ax/storage/botorch_modular_registry.py @@ -10,6 +10,7 @@ # Ax `Acquisition` imports from ax.models.torch.botorch_modular.acquisition import Acquisition +from ax.models.torch.botorch_modular.sebo import SEBOAcquisition # BoTorch `AcquisitionFunction` imports from botorch.acquisition.acquisition import AcquisitionFunction @@ -216,3 +217,6 @@ def register_acquisition_function(acqf_class: Type[AcquisitionFunction]) -> None class_name = acqf_class.__name__ CLASS_TO_REGISTRY[AcquisitionFunction].update({acqf_class: class_name}) CLASS_TO_REVERSE_REGISTRY[AcquisitionFunction].update({class_name: acqf_class}) + + +register_acquisition(SEBOAcquisition) diff --git a/ax/storage/json_store/tests/test_json_store.py b/ax/storage/json_store/tests/test_json_store.py index 4b300cee73d..2d4adc12fb8 100644 --- a/ax/storage/json_store/tests/test_json_store.py +++ b/ax/storage/json_store/tests/test_json_store.py @@ -105,6 +105,7 @@ get_scalarized_objective, get_scheduler_options_batch_trial, get_search_space, + get_sebo_acquisition_class, get_sum_constraint1, get_sum_constraint2, get_surrogate, @@ -209,6 +210,7 @@ ("ThresholdEarlyStoppingStrategy", get_threshold_early_stopping_strategy), ("Trial", get_trial), ("WinsorizationConfig", get_winsorization_config), + ("SEBOAcquisition", get_sebo_acquisition_class), ] diff --git a/ax/utils/testing/core_stubs.py b/ax/utils/testing/core_stubs.py index ff53f109c0e..8ef598d3a73 100644 --- a/ax/utils/testing/core_stubs.py +++ b/ax/utils/testing/core_stubs.py @@ -91,6 +91,7 @@ from ax.modelbridge.factory import Cont_X_trans, get_factorial, get_sobol from ax.models.torch.botorch_modular.acquisition import Acquisition from ax.models.torch.botorch_modular.model import BoTorchModel, SurrogateSpec +from ax.models.torch.botorch_modular.sebo import SEBOAcquisition from ax.models.torch.botorch_modular.surrogate import Surrogate from ax.models.winsorization_config import WinsorizationConfig from ax.runners.synthetic import SyntheticRunner @@ -2006,6 +2007,10 @@ def get_acquisition_function_type() -> Type[AcquisitionFunction]: return qExpectedImprovement +def get_sebo_acquisition_class() -> Type[SEBOAcquisition]: + return SEBOAcquisition + + def get_winsorization_config() -> WinsorizationConfig: return WinsorizationConfig( lower_quantile_margin=0.2,