diff --git a/botorch/utils/probability/bvn.py b/botorch/utils/probability/bvn.py index 1188e78f40..e46fdeae15 100644 --- a/botorch/utils/probability/bvn.py +++ b/botorch/utils/probability/bvn.py @@ -7,18 +7,10 @@ r""" Methods for computing bivariate normal probabilities and statistics. -.. [Drezner1990computation] - Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal - integral. Journal of Statistical Computation and Simulation, 1990. - .. [Genz2004bvnt] A. Genz. Numerical computation of rectangular bivariate and trivariate normal and t probabilities. Statistics and Computing, 2004. -.. [Rosenbaum1961moments] - S. Rosenbaum. Moments of a Truncated Bivariate Normal Distribution. Journal of the - Royal Statistical Society (Series B), 1961. - .. [Muthen1990moments] B. Muthen. Moments of the censored and truncated bivariate normal distribution. British Journal of Mathematical and Statistical Psychology, 1990. @@ -100,16 +92,17 @@ def bvn(r: Tensor, xl: Tensor, yl: Tensor, xu: Tensor, yu: Tensor) -> Tensor: def bvnu(r: Tensor, h: Tensor, k: Tensor) -> Tensor: r"""Solves for `P(x > h, y > k)` where `x` and `y` are standard bivariate normal random variables with correlation coefficient `r`. In [Genz2004bvnt]_, this is (1) - ``` - L(h, k, r) = P(x < -h, y < -k) - = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx, - ``` + + `L(h, k, r) = P(x < -h, y < -k) \ + = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,` + where `f(x, y, r) = e^{-1/(2a^2) (x^2 - 2rxy + y^2)}` and `a = (1 - r^2)^{1/2}`. [Genz2004bvnt]_ report the following integation scheme incurs a maximum of 5e-16 - error when run in double precision. For strongly correlated variables |r| >= 0.925, - use a 20-point quadrature rule on a 5th order Taylor expansion. Elsewhere, - numerically integrate in polar coordinates using no more than 20 quadrature points. + error when run in double precision. For strongly correlated variables with + `|r| >= 0.925`, use a 20-point quadrature rule on a 5th order Taylor expansion. + Elsewhere, numerically integrate in polar coordinates using no more than + 20 quadrature points. Args: r: Tensor of correlation coefficients. @@ -149,6 +142,7 @@ def _bvnu_polar( L(h, k, r) = \Phi(-h)\Phi(-k) + 1/(2\pi) \int_{0}^{sin^{-1}(r)} f(t) dt f(t) = e^{-0.5 cos(t)^{-2} (h^2 + k^2 - 2hk sin(t))} ``` + For details, see Section 2.2 of [Genz2004bvnt]_. """ if num_points is None: @@ -175,14 +169,19 @@ def _bvnu_polar( def _bvnu_taylor(r: Tensor, h: Tensor, k: Tensor, num_points: int = 20) -> Tensor: r"""Solves for `P(x > h, y > k)` via Taylor expansion. - Following [Drezner1990computation], the standard BVN problem may be rewritten as + Following [Drezner1990computation]_, the standard BVN problem may be rewritten as ``` L(h, k, r) = L(h, k, s) - s/(2\pi) \int_{0}^{a} f(x) dx f(x) = (1 - x^2){-1/2} e^{-0.5 ((h - sk)/ x)^2} e^{-shk/(1 + (1 - x^2)^{1/2})}, ``` + where `s = sign(r)` and `a = sqrt(1 - r^{2})`. The term `L(h, k, s)` is analytic. The second integral is approximated via Taylor expansion. See Sections 2.3 and 2.4 of [Genz2004bvnt]_. + + .. [Drezner1990computation] + Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal + integral. Journal of Statistical Computation and Simulation, 1990. """ _0, _1, _ni2, _i2pi, _sq2pi = get_constants_like( values=(0, 1, -0.5, _inv_2pi, _sqrt_2pi), ref=r @@ -255,13 +254,13 @@ def bvnmom( r"""Computes the expected values of truncated, bivariate normal random variables. Let `x` and `y` be a pair of standard bivariate normal random variables having - correlation `r`. This function computes `E([x,y] | [xl,yl] < [x,y] < [xu,yu])`. + correlation `r`. This function computes `E([x,y] \| [xl,yl] < [x,y] < [xu,yu])`. Following [Muthen1990moments]_ equations (4) and (5), we have - ``` - E(x | [xl, yl] < [x, y] < [xu, yu]) - = Z^{-1} \phi(xl) P(yl < y < yu | x=xl) - \phi(xu) P(yl < y < yu | x=xu) - ``` + + `E(x \| [xl, yl] < [x, y] < [xu, yu]) \ + = Z^{-1} \phi(xl) P(yl < y < yu \| x=xl) - \phi(xu) P(yl < y < yu \| x=xu),` + where `Z = P([xl, yl] < [x, y] < [xu, yu])` and `\phi` is the standard normal PDF. Args: @@ -273,7 +272,8 @@ def bvnmom( p: Tensor of probabilities `P(xl < x < xu, yl < y < yu)`, same shape as `r`. Returns: - `E(x | [xl, yl] < [x, y] < [xu, yu])` and `E(y | [xl, yl] < [x, y] < [xu, yu])`. + `E(x \| [xl, yl] < [x, y] < [xu, yu])` and + `E(y \| [xl, yl] < [x, y] < [xu, yu])`. """ if not (r.shape == xl.shape == xu.shape == yl.shape == yu.shape): raise UnsupportedError("Arguments to `bvn` must have the same shape.") diff --git a/botorch/utils/probability/lin_ess.py b/botorch/utils/probability/lin_ess.py index 4dac869b1a..7a664273d4 100644 --- a/botorch/utils/probability/lin_ess.py +++ b/botorch/utils/probability/lin_ess.py @@ -94,13 +94,13 @@ def __init__( try: covariance_root = torch.linalg.cholesky(covariance_matrix) except RuntimeError as e: - if "positive-definite" in str(e): - raise ValueError( + raise_e = e + if "positive-definite" in str(raise_e): + raise_e = ValueError( "Covariance matrix is not positive definite. " "Currently only non-degenerate distributions are supported." ) - else: - raise e + raise raise_e self._covariance_root = covariance_root self._x = self.x0.clone() # state of the sampler ("current point") # We will need the following repeatedly, let's allocate them once @@ -216,11 +216,12 @@ def _find_active_intersections(self, nu: Tensor) -> Tensor: nu=nu, theta=theta, delta_theta=_delta_theta ) theta_active = theta[active_directions.nonzero()] - + delta_theta = _delta_theta while theta_active.numel() % 2 == 1: # Almost tangential ellipses, reduce delta_theta + delta_theta /= 10 active_directions = self._index_active( - theta=theta, nu=nu, delta_theta=0.1 * _delta_theta + theta=theta, nu=nu, delta_theta=delta_theta ) theta_active = theta[active_directions.nonzero()] @@ -236,6 +237,9 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor: """Compute all of the up to 2*n_ineq_con intersections of the ellipse and the linear constraints. + For background, see equation (2) in + http://proceedings.mlr.press/v108/gessner20a/gessner20a.pdf + Args: nu: A `d x 1`-dim tensor (the "new" direction, drawn from N(0, I)). @@ -264,7 +268,7 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor: return torch.sort(theta).values def _index_active( - self, nu: Tensor, theta: Tensor, delta_theta: float = 1e-4 + self, nu: Tensor, theta: Tensor, delta_theta: float = _delta_theta ) -> Tensor: r"""Determine active indices. diff --git a/botorch/utils/probability/linalg.py b/botorch/utils/probability/linalg.py index c292c3eaf8..7a33746a46 100644 --- a/botorch/utils/probability/linalg.py +++ b/botorch/utils/probability/linalg.py @@ -50,9 +50,9 @@ def augment_cholesky( raise ValueError("One and only one of `Kba` or `Lba` must be provided.") if jitter is not None: - diag = Kbb.diagonal(dim1=-2, dim2=-1) + diag_indices = range(Kbb.shape[-1]) Kbb = Kbb.clone() - Kbb.fill_diagonal_(diag + jitter) + Kbb[..., diag_indices, diag_indices] += jitter if Lba is None: Lba = torch.linalg.solve_triangular( @@ -62,7 +62,7 @@ def augment_cholesky( Lbb, info = torch.linalg.cholesky_ex(Kbb - Lba @ Lba.transpose(-2, -1)) if info.any(): raise NotPSDError( - "Schur complement of `K` with respect to `Kaa` not PSD for the given" + "Schur complement of `K` with respect to `Kaa` not PSD for the given " "Cholesky factor `Laa`" f"{'.' if jitter is None else f' and nugget jitter={jitter}.'}" ) @@ -85,19 +85,19 @@ def __post_init__(self, validate_init: bool = True): if self.tril.shape[-2] != self.tril.shape[-1]: raise ValueError( - f"Expected square matrices but `matrix` has shape {self.tril.shape}." + f"Expected square matrices but `matrix` has shape `{self.tril.shape}`." ) if self.perm.shape != self.tril.shape[:-1]: raise ValueError( f"`perm` of shape `{self.perm.shape}` incompatible with " - f"`matrix` of shape `{self.tril.shape}." + f"`matrix` of shape `{self.tril.shape}`." ) if self.diag is not None and self.diag.shape != self.tril.shape[:-1]: raise ValueError( f"`diag` of shape `{self.diag.shape}` incompatible with " - f"`matrix` of shape `{self.tril.shape}." + f"`matrix` of shape `{self.tril.shape}`." ) def __getitem__(self, key: Any) -> PivotedCholesky: @@ -135,9 +135,8 @@ def pivot_(self, pivot: LongTensor) -> None: # Perform basic swaps for key in ("perm", "diag"): tnsr = getattr(self, key, None) - if tnsr is None: - continue - swap_along_dim_(tnsr, i=self.step, j=pivot, dim=pivot.ndim) + if tnsr is not None: + swap_along_dim_(tnsr, i=self.step, j=pivot, dim=tnsr.ndim - 1) # Perform matrix swaps; prealloacte buffers for row/column linear indices size2 = size**2 diff --git a/botorch/utils/probability/truncated_multivariate_normal.py b/botorch/utils/probability/truncated_multivariate_normal.py index c7ee81be5c..0bdb0ed2fd 100644 --- a/botorch/utils/probability/truncated_multivariate_normal.py +++ b/botorch/utils/probability/truncated_multivariate_normal.py @@ -145,4 +145,4 @@ def expand( return new def __repr__(self) -> str: - return super().__repr__()[:-1] + f"bounds: {self.bounds.shape})" + return super().__repr__()[:-1] + f", bounds: {self.bounds.shape})" diff --git a/botorch/utils/probability/unified_skew_normal.py b/botorch/utils/probability/unified_skew_normal.py index 854c791028..4e1a04ac4a 100644 --- a/botorch/utils/probability/unified_skew_normal.py +++ b/botorch/utils/probability/unified_skew_normal.py @@ -7,7 +7,7 @@ from __future__ import annotations from inspect import getmembers -from typing import Optional, Sequence +from typing import Optional, Sequence, Union import torch from botorch.utils.probability.linalg import augment_cholesky, block_matrix_concat @@ -15,6 +15,8 @@ from botorch.utils.probability.truncated_multivariate_normal import ( TruncatedMultivariateNormal, ) +from linear_operator.operators import LinearOperator +from linear_operator.utils.errors import NotPSDError from torch import Tensor from torch.distributions.multivariate_normal import Distribution, MultivariateNormal from torch.distributions.utils import lazy_property @@ -28,7 +30,7 @@ def __init__( self, trunc: TruncatedMultivariateNormal, gauss: MultivariateNormal, - cross_covariance_matrix: Tensor, + cross_covariance_matrix: Union[Tensor, LinearOperator], validate_args: Optional[bool] = None, ): r"""Unified Skew Normal distribution of `Y | a < X < b` for jointly Gaussian @@ -52,7 +54,8 @@ def __init__( f"{len(trunc.event_shape)}-dimensional `trunc` incompatible with" f"{len(gauss.event_shape)}-dimensional `gauss`." ) - + if isinstance(cross_covariance_matrix, LinearOperator): + cross_covariance_matrix = cross_covariance_matrix.to_dense() try: batch_shape = torch.broadcast_shapes(trunc.batch_shape, gauss.batch_shape) except RuntimeError as e: @@ -66,13 +69,21 @@ def __init__( self.trunc = trunc self.gauss = gauss self.cross_covariance_matrix = cross_covariance_matrix - if validate_args: + if self._validate_args: try: + # calling _orthogonalized_gauss first makes the following call + # _orthogonalized_gauss.scale_tril which is used by self.rsample self._orthogonalized_gauss self.scale_tril - except RuntimeError as e: - if "positive-definite" in str(e): - raise ValueError( + except Exception as e: + # error could be thrown by linalg.augment_cholesky (NotPSDError) + # or torch.linalg.cholesky (with "positive-definite" in the message) + if ( + isinstance(e, NotPSDError) + or "positive-definite" in str(e) + or "PositiveDefinite" in str(e) + ): + e = ValueError( "UnifiedSkewNormal is only well-defined for positive definite" " joint covariance matrices." ) @@ -158,7 +169,10 @@ def expand( elif isinstance(obj, Distribution): new_obj = obj.expand(batch_shape=batch_shape) else: - raise TypeError + raise TypeError( + f"Type {type(obj)} of UnifiedSkewNormal's lazy property " + f"{name} not supported." + ) setattr(new, name, new_obj) return new @@ -203,12 +217,6 @@ def _orthogonalized_gauss(self) -> MultivariateNormal: parameters["covariance_matrix"] = ( self.gauss.covariance_matrix - beta.transpose(-1, -2) @ beta ) - return MultivariateNormal( - loc=torch.zeros_like(self.gauss.loc), - scale_tril=self.scale_tril[..., -n:, -n:], - validate_args=self._validate_args, - ) - return MultivariateNormal(**parameters, validate_args=self._validate_args) @lazy_property diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst index d2d9fa66a2..b01eb219e5 100644 --- a/sphinx/source/acquisition.rst +++ b/sphinx/source/acquisition.rst @@ -141,32 +141,32 @@ Utilities ------------------------------------------- Fixed Feature Acquisition Function -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.fixed_feature :members: Constructors for Acquisition Function Input Arguments -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.input_constructors :members: Penalized Acquisition Function Wrapper -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.penalized :members: Proximal Acquisition Function Wrapper -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.proximal :members: General Utilities for Acquisition Functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.utils :members: Multi-Objective Utilities for Acquisition Functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.acquisition.multi_objective.utils :members: diff --git a/sphinx/source/utils.rst b/sphinx/source/utils.rst index 7df2175398..11cff4e181 100644 --- a/sphinx/source/utils.rst +++ b/sphinx/source/utils.rst @@ -72,6 +72,16 @@ Feasible Volume .. automodule:: botorch.utils.feasible_volume :members: +Constants +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.constants + :members: + +Safe Math +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.safe_math + :members: + Multi-Objective Utilities ------------------------------------------- @@ -114,3 +124,41 @@ Scalarization ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: botorch.utils.multi_objective.scalarization :members: + +Probability Utilities +------------------------------------------- + +Multivariate Gaussian Probabilities via Bivariate Conditioning +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.mvnxpb + :members: + +Truncated Multivariate Normal Distribution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.truncated_multivariate_normal + :members: + +Unified Skew Normal Distribution +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.unified_skew_normal + :members: + +Bivariate Normal Probabilities and Statistics +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.bvn + :members: + +Elliptic Slice Sampler with Linear Constraints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.lin_ess + :members: + +Linear Algebra Helpers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.linalg + :members: + +Probability Helpers +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. automodule:: botorch.utils.probability.utils + :members: diff --git a/test/utils/probability/test_lin_ess.py b/test/utils/probability/test_lin_ess.py index 3d725cd39b..76b60686f5 100644 --- a/test/utils/probability/test_lin_ess.py +++ b/test/utils/probability/test_lin_ess.py @@ -80,23 +80,19 @@ def test_univariate(self): # non-standard mean / variance mean = torch.tensor([[0.25]], **tkwargs) covariance_matrix = torch.tensor([[4.0]], **tkwargs) - with self.assertRaises(ValueError) as e: + error_msg = ".*either covariance_matrix or covariance_root, not both.*" + with self.assertRaisesRegex(ValueError, error_msg): LinearEllipticalSliceSampler( bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs), covariance_matrix=covariance_matrix, covariance_root=covariance_matrix.sqrt(), ) - self.assertTrue( - "either covariance_matrix or covariance_root, not both" in str(e) + error_msg = ".*Covariance matrix is not positive definite.*" + with self.assertRaisesRegex(ValueError, error_msg): + LinearEllipticalSliceSampler( + bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs), + covariance_matrix=-covariance_matrix, ) - with self.assertRaises(ValueError) as e: - LinearEllipticalSliceSampler( - bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs), - covariance_matrix=-covariance_matrix, - ) - self.assertTrue( - "Covariance matrix is not positive definite" in str(e) - ) sampler = LinearEllipticalSliceSampler( bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs), mean=mean, @@ -160,3 +156,62 @@ def test_bivariate(self): self.assertEqual(samples.shape, torch.Size([3, 2])) self.assertTrue(sampler._is_feasible(samples.t()).all()) self.assertFalse(torch.equal(sampler._x, sampler.x0)) + + def test_multivariate(self): + d = 3 + lower_bound = 1 + for dtype in (torch.float, torch.double): + tkwargs = {"device": self.device, "dtype": dtype} + # special case: N(0, I) truncated to greater than lower_bound + A = -torch.eye(d, **tkwargs) + b = -torch.full((d, 1), lower_bound, **tkwargs) + sampler = LinearEllipticalSliceSampler(inequality_constraints=(A, b)) + self.assertIsNone(sampler._mean) + self.assertIsNone(sampler._covariance_root) + self.assertTrue(torch.all(sampler._is_feasible(sampler.x0))) + samples = sampler.draw(n=3) + self.assertEqual(samples.shape, torch.Size([3, d])) + self.assertGreaterEqual(samples.min().item(), lower_bound) + self.assertFalse(torch.equal(sampler._x, sampler.x0)) + # same case as above, but instantiated with bounds + sampler = LinearEllipticalSliceSampler( + bounds=torch.tensor( + [[lower_bound for _ in range(d)], [float("inf") for _ in range(d)]], + **tkwargs, + ), + ) + self.assertIsNone(sampler._mean) + self.assertIsNone(sampler._covariance_root) + self.assertTrue(torch.all(sampler._is_feasible(sampler.x0))) + samples = sampler.draw(n=3) + self.assertEqual(samples.shape, torch.Size([3, d])) + self.assertGreaterEqual(samples.min().item(), lower_bound) + self.assertFalse(torch.equal(sampler._x, sampler.x0)) + + # two special cases of _find_intersection_angles below: + # testing _find_intersection_angles with a proposal "nu" + # that ensures that the full ellipse is feasible + # NOTE: this test passes even though the full ellipse might + # not be feasible, which should be investigated further. + # However, this case is unlikely to be of much practical + # importance, as sampling a bound that is *exactly* on the + # constraint boundary is highly unlikely. + nu = torch.full((d, 1), lower_bound, **tkwargs) + sampler = LinearEllipticalSliceSampler( + interior_point=nu, inequality_constraints=(A, b) + ) + nu = torch.tensor([[-0.9199], [1.3555], [1.3738]], **tkwargs) + theta_active = sampler._find_active_intersections(nu) + self.assertTrue( + torch.equal(theta_active, sampler._full_angular_range.view(-1)) + ) + + # testing tangential intersection of ellipse with constraint + nu = torch.full((d, 1), lower_bound, **tkwargs) + sampler = LinearEllipticalSliceSampler( + interior_point=nu, inequality_constraints=(A, b) + ) + nu = torch.full((d, 1), lower_bound, **tkwargs) + nu[1] += 1 + theta_active = sampler._find_active_intersections(nu) + self.assertTrue(theta_active.numel() % 2 == 0) diff --git a/test/utils/probability/test_linalg.py b/test/utils/probability/test_linalg.py index ce4211f93c..8770431754 100644 --- a/test/utils/probability/test_linalg.py +++ b/test/utils/probability/test_linalg.py @@ -9,7 +9,7 @@ from copy import deepcopy import torch -from botorch.utils.probability.linalg import PivotedCholesky +from botorch.utils.probability.linalg import augment_cholesky, PivotedCholesky from botorch.utils.testing import BotorchTestCase @@ -70,6 +70,10 @@ def test_pivot_(self): self.assertEqual(piv_chol.perm.tolist(), [[0, 2, 1, 3, 4], [0, 3, 1, 2, 4]]) self.assertTrue(piv_chol.tril[0].equal(self.piv_chol.tril[0])) + error_msg = "Argument `pivot` does to match with batch shape`." + with self.assertRaisesRegex(ValueError, error_msg): + piv_chol.pivot_(torch.tensor([1, 2, 3])) + A = self.piv_chol.tril[1] B = piv_chol.tril[1] self.assertTrue(A[2:4, :2].equal(B[2:4, :2].roll(1, 0))) @@ -78,9 +82,20 @@ def test_pivot_(self): def test_concat(self): A = self.piv_chol.expand(2, 2) B = self.piv_chol.expand(1, 2) - B = B.concat(B, dim=0) + C = B.concat(B, dim=0) for key in ("tril", "perm", "diag"): - self.assertTrue(getattr(A, key).equal(getattr(B, key))) + self.assertTrue(getattr(A, key).equal(getattr(C, key))) + + B.step = A.step + 1 + error_msg = "Cannot conncatenate decompositions at different steps." + with self.assertRaisesRegex(ValueError, error_msg): + A.concat(B, dim=0) + + B.step = A.step + B.perm = None + error_msg = "Types of field perm do not match." + with self.assertRaisesRegex(NotImplementedError, error_msg): + A.concat(B, dim=0) def test_clone(self): self.piv_chol.diag.requires_grad_(True) @@ -120,3 +135,77 @@ def test_expand(self): b = getattr(other, key) self.assertEqual(b.shape[: -a.ndim], (3,)) self.assertTrue(b._base is a) + + def test_augment(self): + K = self.matrix + n = K.shape[-1] + m = n // 2 + Kaa = K[:, 0:m, 0:m] + Laa = torch.linalg.cholesky(Kaa) + Kbb = K[:, m:, m:] + + error_msg = "One and only one of `Kba` or `Lba` must be provided." + with self.assertRaisesRegex(ValueError, error_msg): + augment_cholesky(Laa, Kbb) + + Kba = K[:, m:, 0:m] + L_augmented = augment_cholesky(Laa, Kbb, Kba) + L = torch.linalg.cholesky(K) + self.assertTrue(torch.allclose(L_augmented, L)) + + # with jitter + jitter = 3e-2 + Laa = torch.linalg.cholesky(Kaa + jitter * torch.eye(m).unsqueeze(0)) + L_augmented = augment_cholesky(Laa, Kbb, Kba, jitter=jitter) + L = torch.linalg.cholesky(K + jitter * torch.eye(n).unsqueeze(0)) + self.assertTrue(torch.allclose(L_augmented, L)) + + def test_errors(self): + matrix = self.matrix + diag = self.diag + diag = matrix.diagonal(dim1=-2, dim2=-1).sqrt() + idiag = diag.reciprocal().unsqueeze(-1) + n = matrix.shape[-1] + + # testing with erroneous inputs + wrong_matrix = matrix[..., 0] + error_msg = "Expected square matrices but `matrix` has shape.*" + with self.assertRaisesRegex(ValueError, error_msg): + PivotedCholesky( + step=0, + tril=wrong_matrix, + perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(), + diag=diag.clone(), + validate_init=True, + ) + + wrong_perm = torch.arange(n)[None].expand(2 * len(matrix), n).contiguous() + error_msg = "`perm` of shape .* incompatible with `matrix` of shape .*" + with self.assertRaisesRegex(ValueError, error_msg): + PivotedCholesky( + step=0, + tril=(idiag * matrix * idiag.transpose(-2, -1)).tril(), + perm=wrong_perm, + diag=diag.clone(), + ) + + wrong_diag = torch.ones(2 * len(diag)) + error_msg = "`diag` of shape .* incompatible with `matrix` of shape .*" + with self.assertRaises(ValueError, msg=error_msg): + PivotedCholesky( + step=0, + tril=(idiag * matrix * idiag.transpose(-2, -1)).tril(), + perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(), + diag=wrong_diag, + ) + + # testing without validation, should pass, + # even though input does not have correct shape + piv_chol = PivotedCholesky( + step=0, + tril=matrix[..., 0], + perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(), + diag=diag.clone(), + validate_init=False, + ) + self.assertTrue(isinstance(piv_chol, PivotedCholesky)) diff --git a/test/utils/probability/test_mvnxpb.py b/test/utils/probability/test_mvnxpb.py index 00e29633d3..a1533e41da 100644 --- a/test/utils/probability/test_mvnxpb.py +++ b/test/utils/probability/test_mvnxpb.py @@ -6,6 +6,8 @@ from __future__ import annotations +from copy import deepcopy + from functools import partial from itertools import count from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union @@ -15,6 +17,7 @@ from botorch.utils.probability.linalg import PivotedCholesky from botorch.utils.probability.mvnxpb import MVNXPB from botorch.utils.testing import BotorchTestCase +from linear_operator.utils.errors import NotPSDError from torch import Tensor @@ -270,6 +273,30 @@ def test_augment(self): ): self.assertEqualMXNBPB(full, augm) + # testing errors + fake_init = deepcopy(init) + fake_init.piv_chol.step = fake_init.perm.shape[-1] + 1 + error_msg = "Augmentation of incomplete solutions not implemented yet." + with self.assertRaisesRegex(NotImplementedError, error_msg): + augm = fake_init.augment( + covariance_matrix=_cov[..., n:, n:], + cross_covariance_matrix=_cov[..., n:, :n], + bounds=_bounds[..., n:, :], + ) + + # Testing that solver will try to recover if it encounters + # a non-psd matrix, even if it ultimately fails in this case + error_msg = ( + "Matrix not positive definite after repeatedly adding jitter up to.*" + ) + with self.assertRaisesRegex(NotPSDError, error_msg): + fake_cov = torch.ones_like(_cov[..., n:, n:]) + augm = init.augment( + covariance_matrix=fake_cov, + cross_covariance_matrix=_cov[..., n:, :n], + bounds=_bounds[..., n:, :], + ) + def test_getitem(self): with torch.random.fork_rng(): torch.random.manual_seed(1) @@ -290,6 +317,11 @@ def test_getitem(self): b = getattr(other.piv_chol, key) self.assertTrue(a.equal(b)) + fake_solver = deepcopy(self.toy_solver) + fake_solver.log_prob_extra = torch.tensor([-1]) + fake_solver_1 = fake_solver[:1] + self.assertEqual(fake_solver_1.log_prob_extra, fake_solver.log_prob_extra[:1]) + def test_concat(self): split = len(self.toy_solver.log_prob) // 2 A = self.toy_solver[:split] @@ -358,3 +390,20 @@ def test_asdict(self): def test_build(self): other = MVNXPB.build(**self.toy_solver.asdict()) self.assertEqualMXNBPB(self.toy_solver, other) + + def test_exceptions(self): + # in solve + fake_solver = deepcopy(self.toy_solver) + fake_solver.step = fake_solver.piv_chol.step + 1 + error_msg = "Invalid state: solver ran ahead of matrix decomposition." + with self.assertRaises(ValueError, msg=error_msg): + fake_solver.solve() + + # in _pivot + with self.assertRaises(ValueError): + pivot = torch.LongTensor([-1]) # this will not be used before the raise + fake_solver.pivot_(pivot) + + error_msg = f"Expected `other` to be {type(fake_solver)} typed but was.*" + with self.assertRaisesRegex(TypeError, error_msg): + fake_solver.concat(1, 1) diff --git a/test/utils/probability/test_unified_skew_normal.py b/test/utils/probability/test_unified_skew_normal.py index 96fa0458c1..00ccfd1aed 100644 --- a/test/utils/probability/test_unified_skew_normal.py +++ b/test/utils/probability/test_unified_skew_normal.py @@ -6,7 +6,10 @@ from __future__ import annotations +from copy import deepcopy + from itertools import count + from typing import Any, Dict, Optional, Sequence, Tuple import torch @@ -16,6 +19,7 @@ ) from botorch.utils.probability.unified_skew_normal import UnifiedSkewNormal from botorch.utils.testing import BotorchTestCase +from linear_operator.operators import DenseLinearOperator from torch import Tensor from torch.distributions import MultivariateNormal from torch.special import ndtri @@ -124,6 +128,12 @@ def test_log_prob(self): # Compare with log probabilities returned by class self.assertTrue(log_probs.allclose(usn.log_prob(vals))) + # checking error handling when incorrectly shaped value is passed + wrong_vals = torch.cat((vals, vals), dim=-1) + error_msg = ".*with shape.*does not comply with the instance.*" + with self.assertRaisesRegex(ValueError, error_msg): + usn.log_prob(wrong_vals) + def test_rsample(self): # TODO: Replace with e.g. two-sample test. with torch.random.fork_rng(): @@ -163,6 +173,11 @@ def test_rsample(self): def test_expand(self): usn = next(iter(self.distributions)) + # calling these lazy properties to cached them and + # hit associated branches in expand + usn._orthogonalized_gauss + usn.covariance_matrix + other = usn.expand(torch.Size([2])) for key in ("loc", "covariance_matrix"): a = getattr(usn.gauss, key) @@ -174,3 +189,98 @@ def test_expand(self): for b in other.cross_covariance_matrix.unbind(): self.assertTrue(usn.cross_covariance_matrix.equal(b)) + + fake_usn = deepcopy(usn) + fake_usn.covariance_matrix = -1 + error_msg = ( + f"Type {type(-1)} of UnifiedSkewNormal's lazy property " + "covariance_matrix not supported.*" + ) + with self.assertRaisesRegex(TypeError, error_msg): + other = fake_usn.expand(torch.Size([2])) + + def test_validate_args(self): + for d in self.distributions: + error_msg = ".*is only well-defined for positive definite.*" + with self.assertRaisesRegex(ValueError, error_msg): + gauss = deepcopy(d.gauss) + gauss.covariance_matrix *= -1 + UnifiedSkewNormal(d.trunc, gauss, d.cross_covariance_matrix) + + error_msg = ".*-dimensional `trunc` incompatible with.*-dimensional `gauss" + with self.assertRaisesRegex(ValueError, error_msg): + gauss = deepcopy(d.gauss) + gauss._event_shape = (*gauss._event_shape, 1) + UnifiedSkewNormal(d.trunc, gauss, d.cross_covariance_matrix) + + error_msg = "Incompatible batch shapes" + with self.assertRaisesRegex(ValueError, error_msg): + gauss = deepcopy(d.gauss) + trunc = deepcopy(d.trunc) + gauss._batch_shape = (*gauss._batch_shape, 2) + trunc._batch_shape = (*trunc._batch_shape, 3) + UnifiedSkewNormal(trunc, gauss, d.cross_covariance_matrix) + + def test_properties(self): + orth = "_orthogonalized_gauss" + scal = "scale_tril" + for d in self.distributions: + # testing calling orthogonalized_gauss and scale_tril + usn = UnifiedSkewNormal( + d.trunc, d.gauss, d.cross_covariance_matrix, validate_args=False + ) + self.assertTrue(orth not in usn.__dict__) + self.assertTrue(scal not in usn.__dict__) + usn._orthogonalized_gauss + self.assertTrue(orth in usn.__dict__) + self.assertTrue(scal not in usn.__dict__) + usn.scale_tril + self.assertTrue(orth in usn.__dict__) + self.assertTrue(scal in usn.__dict__) + + # testing calling orthogonalized_gauss and scale_tril in reverse order + usn = UnifiedSkewNormal( + d.trunc, d.gauss, d.cross_covariance_matrix, validate_args=False + ) + usn.scale_tril + self.assertTrue(orth not in usn.__dict__) + self.assertTrue(scal in usn.__dict__) + usn._orthogonalized_gauss + self.assertTrue(orth in usn.__dict__) + self.assertTrue(scal in usn.__dict__) + + def test_covariance_matrix(self): + for d in self.distributions: + cov = d.covariance_matrix + self.assertTrue(isinstance(cov, Tensor)) + + # testing for symmetry + self.assertTrue(torch.allclose(cov, cov.mT)) + + # testing for positive-definiteness + ispd = False + try: + torch.linalg.cholesky(cov) + ispd = True + except RuntimeError: + pass + self.assertTrue(ispd) + + # checking that linear operator to tensor conversion + # leads to same covariance matrix + xcov_linop = DenseLinearOperator(d.cross_covariance_matrix) + usn_linop = UnifiedSkewNormal( + trunc=d.trunc, gauss=d.gauss, cross_covariance_matrix=xcov_linop + ) + cov_linop = usn_linop.covariance_matrix + self.assertTrue(isinstance(cov_linop, Tensor)) + self.assertTrue(torch.allclose(cov, cov_linop)) + + def test_repr(self): + for d in self.distributions: + r = repr(d) + self.assertTrue(f"trunc: {d.trunc}" in r) + self.assertTrue(f"gauss: {d.gauss}" in r) + self.assertTrue( + f"cross_covariance_matrix: {d.cross_covariance_matrix.shape}" in r + ) diff --git a/test/utils/probability/test_utils.py b/test/utils/probability/test_utils.py index d1cb574e16..d02b31ae34 100644 --- a/test/utils/probability/test_utils.py +++ b/test/utils/probability/test_utils.py @@ -50,6 +50,29 @@ def test_case_dispatcher(self): active[mask] = False self.assertTrue(~active.any() or output[active].eq(len(levels)).all()) + # testing mask.all() branch + edge_cases = [ + (lambda: torch.full(values.shape, True), lambda mask: float("nan")) + ] + output = utils.case_dispatcher( + out=torch.full_like(values, float("nan")), + cases=edge_cases, + default=lambda mask: len(levels), + ) + + # testing if not active.any() branch + pred = torch.full(values.shape, True) + pred[0] = False + edge_cases = [ + (lambda: pred, lambda mask: False), + (lambda: torch.full(values.shape, True), lambda mask: False), + ] + output = utils.case_dispatcher( + out=torch.full_like(values, float("nan")), + cases=edge_cases, + default=lambda mask: len(levels), + ) + def test_build_positional_indices(self): with torch.random.fork_rng(): torch.random.manual_seed(0)