Improving test coverage of UnifiedSkewNormal code (#1408)

Summary: Pull Request resolved: #1408 This commit improves the test coverage of the code located in botorch/utils/probability. For the current coverage without this commit, [see here](https://app.codecov.io/gh/pytorch/botorch/pull/1394). Differential Revision: D39556258 fbshipit-source-id: 4e012054abf4ece1f1739dfc13eb051f09be828d
pytorch · Oct 6, 2022 · 16d0bde · 16d0bde
1 parent 4cab890
commit 16d0bde
Show file tree

Hide file tree

Showing 12 changed files with 458 additions and 73 deletions.
diff --git a/botorch/utils/probability/bvn.py b/botorch/utils/probability/bvn.py
@@ -7,18 +7,10 @@
 r"""
 Methods for computing bivariate normal probabilities and statistics.
 
-.. [Drezner1990computation]
-    Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal
-    integral. Journal of Statistical Computation and Simulation, 1990.
-
 .. [Genz2004bvnt]
     A. Genz. Numerical computation of rectangular bivariate and trivariate normal and
     t probabilities. Statistics and Computing, 2004.
 
-.. [Rosenbaum1961moments]
-    S. Rosenbaum. Moments of a Truncated Bivariate Normal Distribution. Journal of the
-    Royal Statistical Society (Series B), 1961.
-
 .. [Muthen1990moments]
     B. Muthen. Moments of the censored and truncated bivariate normal distribution.
     British Journal of Mathematical and Statistical Psychology, 1990.
@@ -100,16 +92,17 @@ def bvn(r: Tensor, xl: Tensor, yl: Tensor, xu: Tensor, yu: Tensor) -> Tensor:
 def bvnu(r: Tensor, h: Tensor, k: Tensor) -> Tensor:
     r"""Solves for `P(x > h, y > k)` where `x` and `y` are standard bivariate normal
     random variables with correlation coefficient `r`. In [Genz2004bvnt]_, this is (1)
-    ```
-    L(h, k, r) = P(x < -h, y < -k)
-               = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,
-    ```
+
+        `L(h, k, r) = P(x < -h, y < -k) \
+        = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,`
+
     where `f(x, y, r) = e^{-1/(2a^2) (x^2 - 2rxy + y^2)}` and `a = (1 - r^2)^{1/2}`.
 
     [Genz2004bvnt]_ report the following integation scheme incurs a maximum of 5e-16
-    error when run in double precision. For strongly correlated variables |r| >= 0.925,
-    use a 20-point quadrature rule on a 5th order Taylor expansion. Elsewhere,
-    numerically integrate in polar coordinates using no more than 20 quadrature points.
+    error when run in double precision. For strongly correlated variables with
+    `|r| >= 0.925`, use a 20-point quadrature rule on a 5th order Taylor expansion.
+    Elsewhere, numerically integrate in polar coordinates using no more than
+    20 quadrature points.
 
     Args:
         r: Tensor of correlation coefficients.
@@ -149,6 +142,7 @@ def _bvnu_polar(
         L(h, k, r) = \Phi(-h)\Phi(-k) + 1/(2\pi) \int_{0}^{sin^{-1}(r)} f(t) dt
         f(t) = e^{-0.5 cos(t)^{-2} (h^2 + k^2 - 2hk sin(t))}
     ```
+
     For details, see Section 2.2 of [Genz2004bvnt]_.
     """
     if num_points is None:
@@ -175,14 +169,19 @@ def _bvnu_polar(
 def _bvnu_taylor(r: Tensor, h: Tensor, k: Tensor, num_points: int = 20) -> Tensor:
     r"""Solves for `P(x > h, y > k)` via Taylor expansion.
 
-    Following [Drezner1990computation], the standard BVN problem may be rewritten as
+    Following [Drezner1990computation]_, the standard BVN problem may be rewritten as
     ```
         L(h, k, r) = L(h, k, s) - s/(2\pi) \int_{0}^{a} f(x) dx
         f(x) = (1 - x^2){-1/2} e^{-0.5 ((h - sk)/ x)^2} e^{-shk/(1 + (1 - x^2)^{1/2})},
     ```
+
     where `s = sign(r)` and `a = sqrt(1 - r^{2})`. The term `L(h, k, s)` is analytic.
     The second integral is approximated via Taylor expansion. See Sections 2.3 and
     2.4 of [Genz2004bvnt]_.
+
+    .. [Drezner1990computation]
+    Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal
+    integral. Journal of Statistical Computation and Simulation, 1990.
     """
     _0, _1, _ni2, _i2pi, _sq2pi = get_constants_like(
         values=(0, 1, -0.5, _inv_2pi, _sqrt_2pi), ref=r
@@ -255,13 +254,13 @@ def bvnmom(
     r"""Computes the expected values of truncated, bivariate normal random variables.
 
     Let `x` and `y` be a pair of standard bivariate normal random variables having
-    correlation `r`. This function computes `E([x,y] | [xl,yl] < [x,y] < [xu,yu])`.
+    correlation `r`. This function computes `E([x,y] \| [xl,yl] < [x,y] < [xu,yu])`.
 
     Following [Muthen1990moments]_ equations (4) and (5), we have
-    ```
-    E(x | [xl, yl] < [x, y] < [xu, yu])
-        = Z^{-1} \phi(xl) P(yl < y < yu | x=xl) - \phi(xu) P(yl < y < yu | x=xu)
-    ```
+
+        `E(x \| [xl, yl] < [x, y] < [xu, yu]) \
+        = Z^{-1} \phi(xl) P(yl < y < yu \| x=xl) - \phi(xu) P(yl < y < yu \| x=xu),`
+
     where `Z = P([xl, yl] < [x, y] < [xu, yu])` and `\phi` is the standard normal PDF.
 
     Args:
@@ -273,7 +272,8 @@ def bvnmom(
         p: Tensor of probabilities `P(xl < x < xu, yl < y < yu)`, same shape as `r`.
 
     Returns:
-        `E(x | [xl, yl] < [x, y] < [xu, yu])` and `E(y | [xl, yl] < [x, y] < [xu, yu])`.
+        `E(x \| [xl, yl] < [x, y] < [xu, yu])` and
+        `E(y \| [xl, yl] < [x, y] < [xu, yu])`.
     """
     if not (r.shape == xl.shape == xu.shape == yl.shape == yu.shape):
         raise UnsupportedError("Arguments to `bvn` must have the same shape.")

diff --git a/botorch/utils/probability/lin_ess.py b/botorch/utils/probability/lin_ess.py
@@ -94,13 +94,13 @@ def __init__(
             try:
                 covariance_root = torch.linalg.cholesky(covariance_matrix)
             except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+                raise_e = e
+                if "positive-definite" in str(raise_e):
+                    raise_e = ValueError(
                         "Covariance matrix is not positive definite. "
                         "Currently only non-degenerate distributions are supported."
                     )
-                else:
-                    raise e
+                raise raise_e
         self._covariance_root = covariance_root
         self._x = self.x0.clone()  # state of the sampler ("current point")
         # We will need the following repeatedly, let's allocate them once
@@ -216,11 +216,12 @@ def _find_active_intersections(self, nu: Tensor) -> Tensor:
             nu=nu, theta=theta, delta_theta=_delta_theta
         )
         theta_active = theta[active_directions.nonzero()]
-
+        delta_theta = _delta_theta
         while theta_active.numel() % 2 == 1:
             # Almost tangential ellipses, reduce delta_theta
+            delta_theta /= 10
             active_directions = self._index_active(
-                theta=theta, nu=nu, delta_theta=0.1 * _delta_theta
+                theta=theta, nu=nu, delta_theta=delta_theta
             )
             theta_active = theta[active_directions.nonzero()]
 
@@ -236,6 +237,9 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         """Compute all of the up to 2*n_ineq_con intersections of the ellipse
         and the linear constraints.
 
+        For background, see equation (2) in
+        http://proceedings.mlr.press/v108/gessner20a/gessner20a.pdf
+
         Args:
             nu: A `d x 1`-dim tensor (the "new" direction, drawn from N(0, I)).
 
@@ -264,7 +268,7 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         return torch.sort(theta).values
 
     def _index_active(
-        self, nu: Tensor, theta: Tensor, delta_theta: float = 1e-4
+        self, nu: Tensor, theta: Tensor, delta_theta: float = _delta_theta
     ) -> Tensor:
         r"""Determine active indices.
 

diff --git a/botorch/utils/probability/linalg.py b/botorch/utils/probability/linalg.py
@@ -50,9 +50,9 @@ def augment_cholesky(
         raise ValueError("One and only one of `Kba` or `Lba` must be provided.")
 
     if jitter is not None:
-        diag = Kbb.diagonal(dim1=-2, dim2=-1)
+        diag_indices = range(Kbb.shape[-1])
         Kbb = Kbb.clone()
-        Kbb.fill_diagonal_(diag + jitter)
+        Kbb[..., diag_indices, diag_indices] += jitter
 
     if Lba is None:
         Lba = torch.linalg.solve_triangular(
@@ -62,7 +62,7 @@ def augment_cholesky(
     Lbb, info = torch.linalg.cholesky_ex(Kbb - Lba @ Lba.transpose(-2, -1))
     if info.any():
         raise NotPSDError(
-            "Schur complement of `K` with respect to `Kaa` not PSD for the given"
+            "Schur complement of `K` with respect to `Kaa` not PSD for the given "
             "Cholesky factor `Laa`"
             f"{'.' if jitter is None else f' and nugget jitter={jitter}.'}"
         )
@@ -85,19 +85,19 @@ def __post_init__(self, validate_init: bool = True):
 
         if self.tril.shape[-2] != self.tril.shape[-1]:
             raise ValueError(
-                f"Expected square matrices but `matrix` has shape {self.tril.shape}."
+                f"Expected square matrices but `matrix` has shape `{self.tril.shape}`."
             )
 
         if self.perm.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`perm` of shape `{self.perm.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
         if self.diag is not None and self.diag.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`diag` of shape `{self.diag.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
     def __getitem__(self, key: Any) -> PivotedCholesky:
@@ -135,9 +135,8 @@ def pivot_(self, pivot: LongTensor) -> None:
         # Perform basic swaps
         for key in ("perm", "diag"):
             tnsr = getattr(self, key, None)
-            if tnsr is None:
-                continue
-            swap_along_dim_(tnsr, i=self.step, j=pivot, dim=pivot.ndim)
+            if tnsr is not None:
+                swap_along_dim_(tnsr, i=self.step, j=pivot, dim=tnsr.ndim - 1)
 
         # Perform matrix swaps; prealloacte buffers for row/column linear indices
         size2 = size**2

diff --git a/botorch/utils/probability/truncated_multivariate_normal.py b/botorch/utils/probability/truncated_multivariate_normal.py
@@ -145,4 +145,4 @@ def expand(
         return new
 
     def __repr__(self) -> str:
-        return super().__repr__()[:-1] + f"bounds: {self.bounds.shape})"
+        return super().__repr__()[:-1] + f", bounds: {self.bounds.shape})"
diff --git a/botorch/utils/probability/unified_skew_normal.py b/botorch/utils/probability/unified_skew_normal.py
@@ -7,14 +7,16 @@
 from __future__ import annotations
 
 from inspect import getmembers
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Union
 
 import torch
 from botorch.utils.probability.linalg import augment_cholesky, block_matrix_concat
 from botorch.utils.probability.mvnxpb import MVNXPB
 from botorch.utils.probability.truncated_multivariate_normal import (
     TruncatedMultivariateNormal,
 )
+from linear_operator.operators import LinearOperator
+from linear_operator.utils.errors import NotPSDError
 from torch import Tensor
 from torch.distributions.multivariate_normal import Distribution, MultivariateNormal
 from torch.distributions.utils import lazy_property
@@ -28,7 +30,7 @@ def __init__(
         self,
         trunc: TruncatedMultivariateNormal,
         gauss: MultivariateNormal,
-        cross_covariance_matrix: Tensor,
+        cross_covariance_matrix: Union[Tensor, LinearOperator],
         validate_args: Optional[bool] = None,
     ):
         r"""Unified Skew Normal distribution of `Y | a < X < b` for jointly Gaussian
@@ -52,7 +54,8 @@ def __init__(
                 f"{len(trunc.event_shape)}-dimensional `trunc` incompatible with"
                 f"{len(gauss.event_shape)}-dimensional `gauss`."
             )
-
+        if isinstance(cross_covariance_matrix, LinearOperator):
+            cross_covariance_matrix = cross_covariance_matrix.to_dense()
         try:
             batch_shape = torch.broadcast_shapes(trunc.batch_shape, gauss.batch_shape)
         except RuntimeError as e:
@@ -66,13 +69,21 @@ def __init__(
         self.trunc = trunc
         self.gauss = gauss
         self.cross_covariance_matrix = cross_covariance_matrix
-        if validate_args:
+        if self._validate_args:
             try:
+                # calling _orthogonalized_gauss first makes the following call
+                # _orthogonalized_gauss.scale_tril which is used by self.rsample
                 self._orthogonalized_gauss
                 self.scale_tril
-            except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+            except Exception as e:
+                # error could be thrown by linalg.augment_cholesky (NotPSDError)
+                # or torch.linalg.cholesky (with "positive-definite" in the message)
+                if (
+                    isinstance(e, NotPSDError)
+                    or "positive-definite" in str(e)
+                    or "PositiveDefinite" in str(e)
+                ):
+                    e = ValueError(
                         "UnifiedSkewNormal is only well-defined for positive definite"
                         " joint covariance matrices."
                     )
@@ -158,7 +169,10 @@ def expand(
             elif isinstance(obj, Distribution):
                 new_obj = obj.expand(batch_shape=batch_shape)
             else:
-                raise TypeError
+                raise TypeError(
+                    f"Type {type(obj)} of UnifiedSkewNormal's lazy property "
+                    f"{name} not supported."
+                )
 
             setattr(new, name, new_obj)
         return new
@@ -203,12 +217,6 @@ def _orthogonalized_gauss(self) -> MultivariateNormal:
             parameters["covariance_matrix"] = (
                 self.gauss.covariance_matrix - beta.transpose(-1, -2) @ beta
             )
-            return MultivariateNormal(
-                loc=torch.zeros_like(self.gauss.loc),
-                scale_tril=self.scale_tril[..., -n:, -n:],
-                validate_args=self._validate_args,
-            )
-
         return MultivariateNormal(**parameters, validate_args=self._validate_args)
 
     @lazy_property

diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst
@@ -141,32 +141,32 @@ Utilities
 -------------------------------------------
 
 Fixed Feature Acquisition Function
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.fixed_feature
     :members:
 
 Constructors for Acquisition Function Input Arguments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.input_constructors
     :members:
 
 Penalized Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.penalized
     :members:
 
 Proximal Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.proximal
     :members:
 
 General Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.utils
     :members:
 
 
 Multi-Objective Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.multi_objective.utils
     :members: