Skip to content

Commit

Permalink
PairwiseGP modularization and PairwiseLogitLikelihood (#1193)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #1193

Modularize PairwiseGP's likelihood and add logit likelihood support in addition to the original probit likelihood. Fixed PairwiseGP's conditioning gradient issue.

Reviewed By: Balandat

Differential Revision: D35921953

fbshipit-source-id: ff121f654b7e0e2fabb67ec002b2e88481a575c9
  • Loading branch information
ItsMrLin authored and facebook-github-bot committed May 11, 2022
1 parent 3750514 commit 0514dfc
Show file tree
Hide file tree
Showing 8 changed files with 558 additions and 287 deletions.
16 changes: 16 additions & 0 deletions botorch/models/likelihoods/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from botorch.models.likelihoods.pairwise import (
PairwiseProbitLikelihood,
PairwiseLogitLikelihood,
)


__all__ = [
"PairwiseProbitLikelihood",
"PairwiseLogitLikelihood",
]
204 changes: 204 additions & 0 deletions botorch/models/likelihoods/pairwise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Pairwise likelihood for pairwise preference model (e.g., PairwiseGP).
"""

from __future__ import annotations

import math
from abc import ABC, abstractmethod
from typing import Tuple, Any

import torch
from gpytorch.likelihoods import Likelihood
from torch import Tensor
from torch.distributions import Bernoulli


class PairwiseLikelihood(Likelihood, ABC):
"""Pairwise likelihood base class for pairwise preference GP (e.g., PairwiseGP)"""

def forward(self, utility: Tensor, D: Tensor, **kwargs: Any) -> Bernoulli:
"""Given the difference in (estimated) utility util_diff = f(v) - f(u),
return a Bernoulli distribution object representing the likelihood of
the user prefer v over u.
Note that this is not used by the `PairwiseGP` model,
"""
return Bernoulli(probs=self.p(utility=utility, D=D))

@abstractmethod
def p(self, utility: Tensor, D: Tensor) -> Tensor:
"""Given the difference in (estimated) utility util_diff = f(v) - f(u),
return the probability of the user prefer v over u.
Args:
utility: A Tensor of shape `(batch_size) x n`, the utility at MAP point
D: D is `(batch_size x) m x n` matrix with all elements being zero in last
dimension except at two positions D[..., i] = 1 and D[..., j] = -1
respectively, representing item i is preferred over item j.
log: if true, return log probability
"""

def log_p(self, utility: Tensor, D: Tensor) -> Tensor:
"""return the log of p"""
return torch.log(self.p(utility=utility, D=D))

def negative_log_gradient_sum(self, utility: Tensor, D: Tensor) -> Tensor:
"""Calculate the sum of negative log gradient with respect to each item's latent
utility values. Useful for models using laplace approximation.
Args:
utility: A Tensor of shape `(batch_size x) n`, the utility at MAP point
D: D is `(batch_size x) m x n` matrix with all elements being zero in last
dimension except at two positions D[..., i] = 1 and D[..., j] = -1
respectively, representing item i is preferred over item j.
Returns:
A `(batch_size x) n` Tensor representing the sum of negative log gradient
values of the likelihood over all comparisons (i.e., the m dimension)
with respect to each item.
"""
raise NotImplementedError

def negative_log_hessian_sum(self, utility: Tensor, D: Tensor) -> Tensor:
"""Calculate the sum of negative log hessian with respect to each item's latent
utility values. Useful for models using laplace approximation.
Args:
utility: A Tensor of shape `(batch_size) x n`, the utility at MAP point
D: D is `(batch_size x) m x n` matrix with all elements being zero in last
dimension except at two positions D[..., i] = 1 and D[..., j] = -1
respectively, representing item i is preferred over item j.
Returns:
A `(batch_size x) n x n` Tensor representing the sum of negative log hessian
values of the likelihood over all comparisons (i.e., the m dimension) with
respect to each item.
"""
raise NotImplementedError


class PairwiseProbitLikelihood(PairwiseLikelihood):
"""Pairwise likelihood using probit function
Given two items v and u with utilities f(v) and f(u), the probability that we
prefer v over u with probability std_normal_cdf((f(v) - f(u))/sqrt(2)). Note
that this formulation implicitly assume the noise term is fixed at 1.
"""

# Clamping z values for better numerical stability. See self._calc_z for detail
# norm_cdf(z=3) ~= 0.999, top 0.1% percent
_zlim = 3

def _calc_z(self, utility: Tensor, D: Tensor) -> Tensor:
"""Calculate the z score given estimated utility values and
the comparison matrix D.
"""
scaled_util = (utility / math.sqrt(2)).unsqueeze(-1)
z = D.to(scaled_util) @ scaled_util
z = z.clamp(-self._zlim, self._zlim).squeeze(-1)
return z

def _calc_z_derived(self, z: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
"""Calculate auxiliary statistics derived from z, including log pdf,
log cdf, and the hazard function (pdf divided by cdf)"""
std_norm = torch.distributions.normal.Normal(
torch.zeros(1, dtype=z.dtype, device=z.device),
torch.ones(1, dtype=z.dtype, device=z.device),
)
z_logpdf = std_norm.log_prob(z)
z_cdf = std_norm.cdf(z)
z_logcdf = torch.log(z_cdf)
hazard = torch.exp(z_logpdf - z_logcdf)
return z_logpdf, z_logcdf, hazard

def p(self, utility: Tensor, D: Tensor, log: bool = False) -> Tensor:
z = self._calc_z(utility=utility, D=D)
std_norm = torch.distributions.normal.Normal(
torch.zeros(1, dtype=z.dtype, device=z.device),
torch.ones(1, dtype=z.dtype, device=z.device),
)
return std_norm.cdf(z)

def negative_log_gradient_sum(self, utility: Tensor, D: Tensor) -> Tensor:
# Compute the sum over of grad. of negative Log-LH wrt utility f.
# Original grad should be of dimension m x n, as in (6) from
# [Chu2005preference]_. The sum over the m dimension of grad. of
# negative log likelihood with respect to the utility
z = self._calc_z(utility, D)
_, _, h = self._calc_z_derived(z)
h_factor = h / math.sqrt(2)
grad = (h_factor.unsqueeze(-2) @ (-D)).squeeze(-2)

return grad

def negative_log_hessian_sum(self, utility: Tensor, D: Tensor) -> Tensor:
# Original hess should be of dimension m x n x n, as in (7) from
# [Chu2005preference]_ Sum over the first dimension and return a tensor of
# shape n x n.
# The sum over the m dimension of hessian of negative log likelihood
# with respect to the utility
DT = D.transpose(-1, -2)
z = self._calc_z(utility, D)
_, _, h = self._calc_z_derived(z)
mul_factor = h * (h + z) / 2
mul_factor = mul_factor.unsqueeze(-2).expand(*DT.size())
# multiply the hessian value by preference signs
# (+1 if preferred or -1 otherwise) and sum over the m dimension
hess = DT * mul_factor @ D
return hess


class PairwiseLogitLikelihood(PairwiseLikelihood):
"""Pairwise likelihood using logistic (i.e., sigmoid) function
Given two items v and u with utilities f(v) and f(u), the probability that we
prefer v over u with probability sigmoid(f(v) - f(u)). Note
that this formulation implicitly assume the beta term in logistic function is
fixed at 1.
"""

# Clamping logit values for better numerical stability.
# See self._calc_logit for detail logistic(8) ~= 0.9997, top 0.03% percent
_logit_lim = 8

def _calc_logit(self, utility: Tensor, D: Tensor) -> Tensor:
logit = D.to(utility) @ utility.unsqueeze(-1)
logit = logit.clamp(-self._logit_lim, self._logit_lim).squeeze(-1)
return logit

def log_p(self, utility: Tensor, D: Tensor) -> Tensor:
logit = self._calc_logit(utility=utility, D=D)
return torch.nn.functional.logsigmoid(logit)

def p(self, utility: Tensor, D: Tensor) -> Tensor:
logit = self._calc_logit(utility=utility, D=D)
return torch.sigmoid(logit)

def negative_log_gradient_sum(self, utility: Tensor, D: Tensor) -> Tensor:
indices_shape = utility.shape[:-1] + (-1,)
winner_indices = (D == 1).nonzero(as_tuple=True)[-1].reshape(indices_shape)
loser_indices = (D == -1).nonzero(as_tuple=True)[-1].reshape(indices_shape)
ex = torch.exp(torch.gather(utility, -1, winner_indices))
ey = torch.exp(torch.gather(utility, -1, loser_indices))
unsigned_grad = ey / (ex + ey)
grad = (unsigned_grad.unsqueeze(-2) @ (-D)).squeeze(-2)
return grad

def negative_log_hessian_sum(self, utility: Tensor, D: Tensor) -> Tensor:
DT = D.transpose(-1, -2)
# calculating f(v) - f(u) given u > v information in D
neg_logit = -(D @ utility.unsqueeze(-1)).squeeze(-1)
term = torch.sigmoid(neg_logit)
mul_factor = term - (term) ** 2
mul_factor = mul_factor.unsqueeze(-2).expand(*DT.size())
# multiply the hessian value by preference signs
# (+1 if preferred or -1 otherwise) and sum over the m dimension
hess = DT * mul_factor @ D
return hess
Loading

0 comments on commit 0514dfc

Please sign in to comment.