Skip to content

Commit

Permalink
Move consolidate_duplicates to BoTorch and consolidate duplicates in …
Browse files Browse the repository at this point in the history
…PairwiseGP (facebook#1536)

Summary:
Pull Request resolved: facebook#1536

X-link: pytorch/botorch#1754

# Context
One problem for GP models is that when evaluating points that are close, it is likely to trigger numerical issues resulted from non-PSD covariance matrix. The problem is particularly pronounced and hard to bypass when doing optimization (either BOPE or preferential BO) as we would need to repetitively compare points to the incumbent.

To improve preference learning stability, we can automatically consolidate the same (or numerically similar points) into the same point. For example, with training data `datapoints = [[1, 2], [3, 4], [1, 2], [5, 6]]` and `comparisons = [[0, 1], [2, 3]]` with be turned into the consolidated `datapoints = [[1, 2], [3, 4], [5, 6]]` and `comparisons = [[0, 1], [0, 2]]`. This shouldn't lead to any changes model fitting as the likelihood remains the same.

# Code changes
To implement this, following changes are made
- Upstreamed the `consolidate_duplicates` and related helper functions from `Ax` to `Botorch`.
- Implicitly replace `datapoint` and `comparisons` in `PairwiseGP` with the consolidated ones.
- Added `unconsolidated_datapoints`, `unconsolidated_comparisons`, and `unconsolidated_utility` in case the user would like to access the original data and the corresponding utility directly from the model.

Reviewed By: Balandat

Differential Revision: D44126864

fbshipit-source-id: dc0b25d53a859f27226192d3efa61036c4948cc1
  • Loading branch information
ItsMrLin authored and facebook-github-bot committed Mar 21, 2023
1 parent 41c0306 commit a9ffe5c
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 51 deletions.
26 changes: 0 additions & 26 deletions ax/modelbridge/modelbridge_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,32 +1342,6 @@ def observation_features_to_array(
return np.array([[of.parameters[p] for p in parameters] for of in obsf])


def detect_duplicates(
X: Tensor,
rtol: float = 1e-5,
atol: float = 1e-8,
) -> Iterator[Tuple[int, int]]:
"""Returns an iterator over index pairs `(duplicate index, original index)` for all
duplicate entries of `X`.
"""
tols = atol
if rtol:
rval = X.abs().max(dim=-1, keepdim=True).values
tols = tols + rtol * rval.max(rval.transpose(-1, -2))

n = X.shape[-2]
dist = torch.full((n, n), float("inf"), device=X.device, dtype=X.dtype)
dist[torch.triu_indices(n, n, offset=1).unbind()] = torch.nn.functional.pdist(
X, p=float("inf")
)
return (
(i, int(j))
# pyre-fixme[19]: Expected 1 positional argument.
for diff, j, i in zip(*(dist - tols).min(dim=-2), range(n))
if diff < 0
)


def feasible_hypervolume( # pragma: no cover
optimization_config: MultiObjectiveOptimizationConfig, values: Dict[str, np.ndarray]
) -> np.ndarray:
Expand Down
31 changes: 6 additions & 25 deletions ax/modelbridge/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import torch
from ax.core.observation import ObservationData, ObservationFeatures
from ax.core.types import TCandidateMetadata
from ax.modelbridge.modelbridge_utils import detect_duplicates
from ax.modelbridge.torch import TorchModelBridge
from botorch.models.utils.assorted import consolidate_duplicates
from botorch.utils.containers import SliceContainer
from botorch.utils.datasets import RankingDataset, SupervisedDataset
from torch import Tensor
Expand Down Expand Up @@ -94,36 +94,17 @@ def _binary_pref_to_comp_pair(Y: Tensor) -> Tensor:

def _consolidate_comparisons(X: Tensor, Y: Tensor) -> Tuple[Tensor, Tensor]:
"""Drop duplicated Xs and update the indices in Ys accordingly"""
if len(X.shape) != 2:
raise ValueError("X must have 2 dimensions.") # pragma: no cover
if len(Y.shape) != 2:
raise ValueError("Y must have 2 dimensions.") # pragma: no cover
if Y.shape[-1] != 2:
raise ValueError( # pragma: no cover
"The last dimension of Y must contain 2 elements "
"representing the pairwise comparison."
)

n = X.shape[-2]
dupplicates = list(detect_duplicates(X=X))
if len(dupplicates) != 0:
dup_indices, kept_indices = zip(*dupplicates)
unique_indices = set(range(n)) - set(dup_indices)

# After dropping the duplicates,
# the kept ones' indices may also change by being shifted up
new_idx_map = dict(zip(unique_indices, range(len(unique_indices))))
new_indices_for_dup = (new_idx_map[idx] for idx in kept_indices)
new_idx_map.update(dict(zip(dup_indices, new_indices_for_dup)))

consolidated_X = X[list(unique_indices), :]
consolidated_Y = torch.tensor(
[(new_idx_map[y1.item()], new_idx_map[y2.item()]) for y1, y2 in Y],
dtype=torch.long,
)
return consolidated_X, consolidated_Y
else:
return X, Y
if len(Y.shape) != 2:
raise ValueError("Y must have 2 dimensions.")

X, Y, _ = consolidate_duplicates(X, Y)
return X, Y


def _validate_Y_values(Y: Tensor) -> None:
Expand Down

0 comments on commit a9ffe5c

Please sign in to comment.