diff --git a/botorch/utils/probability/bvn.py b/botorch/utils/probability/bvn.py
index 1188e78f40..e46fdeae15 100644
--- a/botorch/utils/probability/bvn.py
+++ b/botorch/utils/probability/bvn.py
@@ -7,18 +7,10 @@
 r"""
 Methods for computing bivariate normal probabilities and statistics.
 
-.. [Drezner1990computation]
-    Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal
-    integral. Journal of Statistical Computation and Simulation, 1990.
-
 .. [Genz2004bvnt]
     A. Genz. Numerical computation of rectangular bivariate and trivariate normal and
     t probabilities. Statistics and Computing, 2004.
 
-.. [Rosenbaum1961moments]
-    S. Rosenbaum. Moments of a Truncated Bivariate Normal Distribution. Journal of the
-    Royal Statistical Society (Series B), 1961.
-
 .. [Muthen1990moments]
     B. Muthen. Moments of the censored and truncated bivariate normal distribution.
     British Journal of Mathematical and Statistical Psychology, 1990.
@@ -100,16 +92,17 @@ def bvn(r: Tensor, xl: Tensor, yl: Tensor, xu: Tensor, yu: Tensor) -> Tensor:
 def bvnu(r: Tensor, h: Tensor, k: Tensor) -> Tensor:
     r"""Solves for `P(x > h, y > k)` where `x` and `y` are standard bivariate normal
     random variables with correlation coefficient `r`. In [Genz2004bvnt]_, this is (1)
-    ```
-    L(h, k, r) = P(x < -h, y < -k)
-               = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,
-    ```
+
+        `L(h, k, r) = P(x < -h, y < -k) \
+        = 1/(a 2\pi) \int_{h}^{\infty} \int_{k}^{\infty} f(x, y, r) dy dx,`
+
     where `f(x, y, r) = e^{-1/(2a^2) (x^2 - 2rxy + y^2)}` and `a = (1 - r^2)^{1/2}`.
 
     [Genz2004bvnt]_ report the following integation scheme incurs a maximum of 5e-16
-    error when run in double precision. For strongly correlated variables |r| >= 0.925,
-    use a 20-point quadrature rule on a 5th order Taylor expansion. Elsewhere,
-    numerically integrate in polar coordinates using no more than 20 quadrature points.
+    error when run in double precision. For strongly correlated variables with
+    `|r| >= 0.925`, use a 20-point quadrature rule on a 5th order Taylor expansion.
+    Elsewhere, numerically integrate in polar coordinates using no more than
+    20 quadrature points.
 
     Args:
         r: Tensor of correlation coefficients.
@@ -149,6 +142,7 @@ def _bvnu_polar(
         L(h, k, r) = \Phi(-h)\Phi(-k) + 1/(2\pi) \int_{0}^{sin^{-1}(r)} f(t) dt
         f(t) = e^{-0.5 cos(t)^{-2} (h^2 + k^2 - 2hk sin(t))}
     ```
+
     For details, see Section 2.2 of [Genz2004bvnt]_.
     """
     if num_points is None:
@@ -175,14 +169,19 @@ def _bvnu_polar(
 def _bvnu_taylor(r: Tensor, h: Tensor, k: Tensor, num_points: int = 20) -> Tensor:
     r"""Solves for `P(x > h, y > k)` via Taylor expansion.
 
-    Following [Drezner1990computation], the standard BVN problem may be rewritten as
+    Following [Drezner1990computation]_, the standard BVN problem may be rewritten as
     ```
         L(h, k, r) = L(h, k, s) - s/(2\pi) \int_{0}^{a} f(x) dx
         f(x) = (1 - x^2){-1/2} e^{-0.5 ((h - sk)/ x)^2} e^{-shk/(1 + (1 - x^2)^{1/2})},
     ```
+
     where `s = sign(r)` and `a = sqrt(1 - r^{2})`. The term `L(h, k, s)` is analytic.
     The second integral is approximated via Taylor expansion. See Sections 2.3 and
     2.4 of [Genz2004bvnt]_.
+
+    .. [Drezner1990computation]
+    Z. Drezner and G. O. Wesolowsky. On the computation of the bivariate normal
+    integral. Journal of Statistical Computation and Simulation, 1990.
     """
     _0, _1, _ni2, _i2pi, _sq2pi = get_constants_like(
         values=(0, 1, -0.5, _inv_2pi, _sqrt_2pi), ref=r
@@ -255,13 +254,13 @@ def bvnmom(
     r"""Computes the expected values of truncated, bivariate normal random variables.
 
     Let `x` and `y` be a pair of standard bivariate normal random variables having
-    correlation `r`. This function computes `E([x,y] | [xl,yl] < [x,y] < [xu,yu])`.
+    correlation `r`. This function computes `E([x,y] \| [xl,yl] < [x,y] < [xu,yu])`.
 
     Following [Muthen1990moments]_ equations (4) and (5), we have
-    ```
-    E(x | [xl, yl] < [x, y] < [xu, yu])
-        = Z^{-1} \phi(xl) P(yl < y < yu | x=xl) - \phi(xu) P(yl < y < yu | x=xu)
-    ```
+
+        `E(x \| [xl, yl] < [x, y] < [xu, yu]) \
+        = Z^{-1} \phi(xl) P(yl < y < yu \| x=xl) - \phi(xu) P(yl < y < yu \| x=xu),`
+
     where `Z = P([xl, yl] < [x, y] < [xu, yu])` and `\phi` is the standard normal PDF.
 
     Args:
@@ -273,7 +272,8 @@ def bvnmom(
         p: Tensor of probabilities `P(xl < x < xu, yl < y < yu)`, same shape as `r`.
 
     Returns:
-        `E(x | [xl, yl] < [x, y] < [xu, yu])` and `E(y | [xl, yl] < [x, y] < [xu, yu])`.
+        `E(x \| [xl, yl] < [x, y] < [xu, yu])` and
+        `E(y \| [xl, yl] < [x, y] < [xu, yu])`.
     """
     if not (r.shape == xl.shape == xu.shape == yl.shape == yu.shape):
         raise UnsupportedError("Arguments to `bvn` must have the same shape.")
diff --git a/botorch/utils/probability/lin_ess.py b/botorch/utils/probability/lin_ess.py
index 4dac869b1a..7a664273d4 100644
--- a/botorch/utils/probability/lin_ess.py
+++ b/botorch/utils/probability/lin_ess.py
@@ -94,13 +94,13 @@ def __init__(
             try:
                 covariance_root = torch.linalg.cholesky(covariance_matrix)
             except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+                raise_e = e
+                if "positive-definite" in str(raise_e):
+                    raise_e = ValueError(
                         "Covariance matrix is not positive definite. "
                         "Currently only non-degenerate distributions are supported."
                     )
-                else:
-                    raise e
+                raise raise_e
         self._covariance_root = covariance_root
         self._x = self.x0.clone()  # state of the sampler ("current point")
         # We will need the following repeatedly, let's allocate them once
@@ -216,11 +216,12 @@ def _find_active_intersections(self, nu: Tensor) -> Tensor:
             nu=nu, theta=theta, delta_theta=_delta_theta
         )
         theta_active = theta[active_directions.nonzero()]
-
+        delta_theta = _delta_theta
         while theta_active.numel() % 2 == 1:
             # Almost tangential ellipses, reduce delta_theta
+            delta_theta /= 10
             active_directions = self._index_active(
-                theta=theta, nu=nu, delta_theta=0.1 * _delta_theta
+                theta=theta, nu=nu, delta_theta=delta_theta
             )
             theta_active = theta[active_directions.nonzero()]
 
@@ -236,6 +237,9 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         """Compute all of the up to 2*n_ineq_con intersections of the ellipse
         and the linear constraints.
 
+        For background, see equation (2) in
+        http://proceedings.mlr.press/v108/gessner20a/gessner20a.pdf
+
         Args:
             nu: A `d x 1`-dim tensor (the "new" direction, drawn from N(0, I)).
 
@@ -264,7 +268,7 @@ def _find_intersection_angles(self, nu: Tensor) -> Tensor:
         return torch.sort(theta).values
 
     def _index_active(
-        self, nu: Tensor, theta: Tensor, delta_theta: float = 1e-4
+        self, nu: Tensor, theta: Tensor, delta_theta: float = _delta_theta
     ) -> Tensor:
         r"""Determine active indices.
 
diff --git a/botorch/utils/probability/linalg.py b/botorch/utils/probability/linalg.py
index c292c3eaf8..7a33746a46 100644
--- a/botorch/utils/probability/linalg.py
+++ b/botorch/utils/probability/linalg.py
@@ -50,9 +50,9 @@ def augment_cholesky(
         raise ValueError("One and only one of `Kba` or `Lba` must be provided.")
 
     if jitter is not None:
-        diag = Kbb.diagonal(dim1=-2, dim2=-1)
+        diag_indices = range(Kbb.shape[-1])
         Kbb = Kbb.clone()
-        Kbb.fill_diagonal_(diag + jitter)
+        Kbb[..., diag_indices, diag_indices] += jitter
 
     if Lba is None:
         Lba = torch.linalg.solve_triangular(
@@ -62,7 +62,7 @@ def augment_cholesky(
     Lbb, info = torch.linalg.cholesky_ex(Kbb - Lba @ Lba.transpose(-2, -1))
     if info.any():
         raise NotPSDError(
-            "Schur complement of `K` with respect to `Kaa` not PSD for the given"
+            "Schur complement of `K` with respect to `Kaa` not PSD for the given "
             "Cholesky factor `Laa`"
             f"{'.' if jitter is None else f' and nugget jitter={jitter}.'}"
         )
@@ -85,19 +85,19 @@ def __post_init__(self, validate_init: bool = True):
 
         if self.tril.shape[-2] != self.tril.shape[-1]:
             raise ValueError(
-                f"Expected square matrices but `matrix` has shape {self.tril.shape}."
+                f"Expected square matrices but `matrix` has shape `{self.tril.shape}`."
             )
 
         if self.perm.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`perm` of shape `{self.perm.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
         if self.diag is not None and self.diag.shape != self.tril.shape[:-1]:
             raise ValueError(
                 f"`diag` of shape `{self.diag.shape}` incompatible with "
-                f"`matrix` of shape `{self.tril.shape}."
+                f"`matrix` of shape `{self.tril.shape}`."
             )
 
     def __getitem__(self, key: Any) -> PivotedCholesky:
@@ -135,9 +135,8 @@ def pivot_(self, pivot: LongTensor) -> None:
         # Perform basic swaps
         for key in ("perm", "diag"):
             tnsr = getattr(self, key, None)
-            if tnsr is None:
-                continue
-            swap_along_dim_(tnsr, i=self.step, j=pivot, dim=pivot.ndim)
+            if tnsr is not None:
+                swap_along_dim_(tnsr, i=self.step, j=pivot, dim=tnsr.ndim - 1)
 
         # Perform matrix swaps; prealloacte buffers for row/column linear indices
         size2 = size**2
diff --git a/botorch/utils/probability/truncated_multivariate_normal.py b/botorch/utils/probability/truncated_multivariate_normal.py
index c7ee81be5c..0bdb0ed2fd 100644
--- a/botorch/utils/probability/truncated_multivariate_normal.py
+++ b/botorch/utils/probability/truncated_multivariate_normal.py
@@ -145,4 +145,4 @@ def expand(
         return new
 
     def __repr__(self) -> str:
-        return super().__repr__()[:-1] + f"bounds: {self.bounds.shape})"
+        return super().__repr__()[:-1] + f", bounds: {self.bounds.shape})"
diff --git a/botorch/utils/probability/unified_skew_normal.py b/botorch/utils/probability/unified_skew_normal.py
index 854c791028..4e1a04ac4a 100644
--- a/botorch/utils/probability/unified_skew_normal.py
+++ b/botorch/utils/probability/unified_skew_normal.py
@@ -7,7 +7,7 @@
 from __future__ import annotations
 
 from inspect import getmembers
-from typing import Optional, Sequence
+from typing import Optional, Sequence, Union
 
 import torch
 from botorch.utils.probability.linalg import augment_cholesky, block_matrix_concat
@@ -15,6 +15,8 @@
 from botorch.utils.probability.truncated_multivariate_normal import (
     TruncatedMultivariateNormal,
 )
+from linear_operator.operators import LinearOperator
+from linear_operator.utils.errors import NotPSDError
 from torch import Tensor
 from torch.distributions.multivariate_normal import Distribution, MultivariateNormal
 from torch.distributions.utils import lazy_property
@@ -28,7 +30,7 @@ def __init__(
         self,
         trunc: TruncatedMultivariateNormal,
         gauss: MultivariateNormal,
-        cross_covariance_matrix: Tensor,
+        cross_covariance_matrix: Union[Tensor, LinearOperator],
         validate_args: Optional[bool] = None,
     ):
         r"""Unified Skew Normal distribution of `Y | a < X < b` for jointly Gaussian
@@ -52,7 +54,8 @@ def __init__(
                 f"{len(trunc.event_shape)}-dimensional `trunc` incompatible with"
                 f"{len(gauss.event_shape)}-dimensional `gauss`."
             )
-
+        if isinstance(cross_covariance_matrix, LinearOperator):
+            cross_covariance_matrix = cross_covariance_matrix.to_dense()
         try:
             batch_shape = torch.broadcast_shapes(trunc.batch_shape, gauss.batch_shape)
         except RuntimeError as e:
@@ -66,13 +69,21 @@ def __init__(
         self.trunc = trunc
         self.gauss = gauss
         self.cross_covariance_matrix = cross_covariance_matrix
-        if validate_args:
+        if self._validate_args:
             try:
+                # calling _orthogonalized_gauss first makes the following call
+                # _orthogonalized_gauss.scale_tril which is used by self.rsample
                 self._orthogonalized_gauss
                 self.scale_tril
-            except RuntimeError as e:
-                if "positive-definite" in str(e):
-                    raise ValueError(
+            except Exception as e:
+                # error could be thrown by linalg.augment_cholesky (NotPSDError)
+                # or torch.linalg.cholesky (with "positive-definite" in the message)
+                if (
+                    isinstance(e, NotPSDError)
+                    or "positive-definite" in str(e)
+                    or "PositiveDefinite" in str(e)
+                ):
+                    e = ValueError(
                         "UnifiedSkewNormal is only well-defined for positive definite"
                         " joint covariance matrices."
                     )
@@ -158,7 +169,10 @@ def expand(
             elif isinstance(obj, Distribution):
                 new_obj = obj.expand(batch_shape=batch_shape)
             else:
-                raise TypeError
+                raise TypeError(
+                    f"Type {type(obj)} of UnifiedSkewNormal's lazy property "
+                    f"{name} not supported."
+                )
 
             setattr(new, name, new_obj)
         return new
@@ -203,12 +217,6 @@ def _orthogonalized_gauss(self) -> MultivariateNormal:
             parameters["covariance_matrix"] = (
                 self.gauss.covariance_matrix - beta.transpose(-1, -2) @ beta
             )
-            return MultivariateNormal(
-                loc=torch.zeros_like(self.gauss.loc),
-                scale_tril=self.scale_tril[..., -n:, -n:],
-                validate_args=self._validate_args,
-            )
-
         return MultivariateNormal(**parameters, validate_args=self._validate_args)
 
     @lazy_property
diff --git a/sphinx/source/acquisition.rst b/sphinx/source/acquisition.rst
index d2d9fa66a2..b01eb219e5 100644
--- a/sphinx/source/acquisition.rst
+++ b/sphinx/source/acquisition.rst
@@ -141,32 +141,32 @@ Utilities
 -------------------------------------------
 
 Fixed Feature Acquisition Function
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.fixed_feature
     :members:
 
 Constructors for Acquisition Function Input Arguments
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.input_constructors
     :members:
 
 Penalized Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.penalized
     :members:
 
 Proximal Acquisition Function Wrapper
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.proximal
     :members:
 
 General Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.utils
     :members:
 
 
 Multi-Objective Utilities for Acquisition Functions
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.acquisition.multi_objective.utils
     :members:
diff --git a/sphinx/source/utils.rst b/sphinx/source/utils.rst
index 7df2175398..11cff4e181 100644
--- a/sphinx/source/utils.rst
+++ b/sphinx/source/utils.rst
@@ -72,6 +72,16 @@ Feasible Volume
 .. automodule:: botorch.utils.feasible_volume
 		:members:
 
+Constants
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.constants
+		:members:
+
+Safe Math
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.safe_math
+		:members:
+
 Multi-Objective Utilities
 -------------------------------------------
 
@@ -114,3 +124,41 @@ Scalarization
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. automodule:: botorch.utils.multi_objective.scalarization
 		:members:
+
+Probability Utilities
+-------------------------------------------
+
+Multivariate Gaussian Probabilities via Bivariate Conditioning
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.mvnxpb
+    	:members:
+
+Truncated Multivariate Normal Distribution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.truncated_multivariate_normal
+    	:members:
+
+Unified Skew Normal Distribution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.unified_skew_normal
+    	:members:
+
+Bivariate Normal Probabilities and Statistics
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.bvn
+    	:members:
+
+Elliptic Slice Sampler with Linear Constraints
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.lin_ess
+    	:members:
+
+Linear Algebra Helpers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.linalg
+    	:members:
+
+Probability Helpers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. automodule:: botorch.utils.probability.utils
+    	:members:
diff --git a/test/utils/probability/test_lin_ess.py b/test/utils/probability/test_lin_ess.py
index 3d725cd39b..76b60686f5 100644
--- a/test/utils/probability/test_lin_ess.py
+++ b/test/utils/probability/test_lin_ess.py
@@ -80,23 +80,19 @@ def test_univariate(self):
             # non-standard mean / variance
             mean = torch.tensor([[0.25]], **tkwargs)
             covariance_matrix = torch.tensor([[4.0]], **tkwargs)
-            with self.assertRaises(ValueError) as e:
+            error_msg = ".*either covariance_matrix or covariance_root, not both.*"
+            with self.assertRaisesRegex(ValueError, error_msg):
                 LinearEllipticalSliceSampler(
                     bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs),
                     covariance_matrix=covariance_matrix,
                     covariance_root=covariance_matrix.sqrt(),
                 )
-                self.assertTrue(
-                    "either covariance_matrix or covariance_root, not both" in str(e)
+            error_msg = ".*Covariance matrix is not positive definite.*"
+            with self.assertRaisesRegex(ValueError, error_msg):
+                LinearEllipticalSliceSampler(
+                    bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs),
+                    covariance_matrix=-covariance_matrix,
                 )
-                with self.assertRaises(ValueError) as e:
-                    LinearEllipticalSliceSampler(
-                        bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs),
-                        covariance_matrix=-covariance_matrix,
-                    )
-                    self.assertTrue(
-                        "Covariance matrix is not positive definite" in str(e)
-                    )
             sampler = LinearEllipticalSliceSampler(
                 bounds=torch.tensor([[0.0], [float("inf")]], **tkwargs),
                 mean=mean,
@@ -160,3 +156,62 @@ def test_bivariate(self):
             self.assertEqual(samples.shape, torch.Size([3, 2]))
             self.assertTrue(sampler._is_feasible(samples.t()).all())
             self.assertFalse(torch.equal(sampler._x, sampler.x0))
+
+    def test_multivariate(self):
+        d = 3
+        lower_bound = 1
+        for dtype in (torch.float, torch.double):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            # special case: N(0, I) truncated to greater than lower_bound
+            A = -torch.eye(d, **tkwargs)
+            b = -torch.full((d, 1), lower_bound, **tkwargs)
+            sampler = LinearEllipticalSliceSampler(inequality_constraints=(A, b))
+            self.assertIsNone(sampler._mean)
+            self.assertIsNone(sampler._covariance_root)
+            self.assertTrue(torch.all(sampler._is_feasible(sampler.x0)))
+            samples = sampler.draw(n=3)
+            self.assertEqual(samples.shape, torch.Size([3, d]))
+            self.assertGreaterEqual(samples.min().item(), lower_bound)
+            self.assertFalse(torch.equal(sampler._x, sampler.x0))
+            # same case as above, but instantiated with bounds
+            sampler = LinearEllipticalSliceSampler(
+                bounds=torch.tensor(
+                    [[lower_bound for _ in range(d)], [float("inf") for _ in range(d)]],
+                    **tkwargs,
+                ),
+            )
+            self.assertIsNone(sampler._mean)
+            self.assertIsNone(sampler._covariance_root)
+            self.assertTrue(torch.all(sampler._is_feasible(sampler.x0)))
+            samples = sampler.draw(n=3)
+            self.assertEqual(samples.shape, torch.Size([3, d]))
+            self.assertGreaterEqual(samples.min().item(), lower_bound)
+            self.assertFalse(torch.equal(sampler._x, sampler.x0))
+
+            # two special cases of _find_intersection_angles below:
+            # testing _find_intersection_angles with a proposal "nu"
+            # that ensures that the full ellipse is feasible
+            # NOTE: this test passes even though the full ellipse might
+            # not be feasible, which should be investigated further.
+            # However, this case is unlikely to be of much practical
+            # importance, as sampling a bound that is *exactly* on the
+            # constraint boundary is highly unlikely.
+            nu = torch.full((d, 1), lower_bound, **tkwargs)
+            sampler = LinearEllipticalSliceSampler(
+                interior_point=nu, inequality_constraints=(A, b)
+            )
+            nu = torch.tensor([[-0.9199], [1.3555], [1.3738]], **tkwargs)
+            theta_active = sampler._find_active_intersections(nu)
+            self.assertTrue(
+                torch.equal(theta_active, sampler._full_angular_range.view(-1))
+            )
+
+            # testing tangential intersection of ellipse with constraint
+            nu = torch.full((d, 1), lower_bound, **tkwargs)
+            sampler = LinearEllipticalSliceSampler(
+                interior_point=nu, inequality_constraints=(A, b)
+            )
+            nu = torch.full((d, 1), lower_bound, **tkwargs)
+            nu[1] += 1
+            theta_active = sampler._find_active_intersections(nu)
+            self.assertTrue(theta_active.numel() % 2 == 0)
diff --git a/test/utils/probability/test_linalg.py b/test/utils/probability/test_linalg.py
index ce4211f93c..8770431754 100644
--- a/test/utils/probability/test_linalg.py
+++ b/test/utils/probability/test_linalg.py
@@ -9,7 +9,7 @@
 from copy import deepcopy
 
 import torch
-from botorch.utils.probability.linalg import PivotedCholesky
+from botorch.utils.probability.linalg import augment_cholesky, PivotedCholesky
 from botorch.utils.testing import BotorchTestCase
 
 
@@ -70,6 +70,10 @@ def test_pivot_(self):
         self.assertEqual(piv_chol.perm.tolist(), [[0, 2, 1, 3, 4], [0, 3, 1, 2, 4]])
         self.assertTrue(piv_chol.tril[0].equal(self.piv_chol.tril[0]))
 
+        error_msg = "Argument `pivot` does to match with batch shape`."
+        with self.assertRaisesRegex(ValueError, error_msg):
+            piv_chol.pivot_(torch.tensor([1, 2, 3]))
+
         A = self.piv_chol.tril[1]
         B = piv_chol.tril[1]
         self.assertTrue(A[2:4, :2].equal(B[2:4, :2].roll(1, 0)))
@@ -78,9 +82,20 @@ def test_pivot_(self):
     def test_concat(self):
         A = self.piv_chol.expand(2, 2)
         B = self.piv_chol.expand(1, 2)
-        B = B.concat(B, dim=0)
+        C = B.concat(B, dim=0)
         for key in ("tril", "perm", "diag"):
-            self.assertTrue(getattr(A, key).equal(getattr(B, key)))
+            self.assertTrue(getattr(A, key).equal(getattr(C, key)))
+
+        B.step = A.step + 1
+        error_msg = "Cannot conncatenate decompositions at different steps."
+        with self.assertRaisesRegex(ValueError, error_msg):
+            A.concat(B, dim=0)
+
+        B.step = A.step
+        B.perm = None
+        error_msg = "Types of field perm do not match."
+        with self.assertRaisesRegex(NotImplementedError, error_msg):
+            A.concat(B, dim=0)
 
     def test_clone(self):
         self.piv_chol.diag.requires_grad_(True)
@@ -120,3 +135,77 @@ def test_expand(self):
             b = getattr(other, key)
             self.assertEqual(b.shape[: -a.ndim], (3,))
             self.assertTrue(b._base is a)
+
+    def test_augment(self):
+        K = self.matrix
+        n = K.shape[-1]
+        m = n // 2
+        Kaa = K[:, 0:m, 0:m]
+        Laa = torch.linalg.cholesky(Kaa)
+        Kbb = K[:, m:, m:]
+
+        error_msg = "One and only one of `Kba` or `Lba` must be provided."
+        with self.assertRaisesRegex(ValueError, error_msg):
+            augment_cholesky(Laa, Kbb)
+
+        Kba = K[:, m:, 0:m]
+        L_augmented = augment_cholesky(Laa, Kbb, Kba)
+        L = torch.linalg.cholesky(K)
+        self.assertTrue(torch.allclose(L_augmented, L))
+
+        # with jitter
+        jitter = 3e-2
+        Laa = torch.linalg.cholesky(Kaa + jitter * torch.eye(m).unsqueeze(0))
+        L_augmented = augment_cholesky(Laa, Kbb, Kba, jitter=jitter)
+        L = torch.linalg.cholesky(K + jitter * torch.eye(n).unsqueeze(0))
+        self.assertTrue(torch.allclose(L_augmented, L))
+
+    def test_errors(self):
+        matrix = self.matrix
+        diag = self.diag
+        diag = matrix.diagonal(dim1=-2, dim2=-1).sqrt()
+        idiag = diag.reciprocal().unsqueeze(-1)
+        n = matrix.shape[-1]
+
+        # testing with erroneous inputs
+        wrong_matrix = matrix[..., 0]
+        error_msg = "Expected square matrices but `matrix` has shape.*"
+        with self.assertRaisesRegex(ValueError, error_msg):
+            PivotedCholesky(
+                step=0,
+                tril=wrong_matrix,
+                perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(),
+                diag=diag.clone(),
+                validate_init=True,
+            )
+
+        wrong_perm = torch.arange(n)[None].expand(2 * len(matrix), n).contiguous()
+        error_msg = "`perm` of shape .* incompatible with `matrix` of shape .*"
+        with self.assertRaisesRegex(ValueError, error_msg):
+            PivotedCholesky(
+                step=0,
+                tril=(idiag * matrix * idiag.transpose(-2, -1)).tril(),
+                perm=wrong_perm,
+                diag=diag.clone(),
+            )
+
+        wrong_diag = torch.ones(2 * len(diag))
+        error_msg = "`diag` of shape .* incompatible with `matrix` of shape .*"
+        with self.assertRaises(ValueError, msg=error_msg):
+            PivotedCholesky(
+                step=0,
+                tril=(idiag * matrix * idiag.transpose(-2, -1)).tril(),
+                perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(),
+                diag=wrong_diag,
+            )
+
+        # testing without validation, should pass,
+        # even though input does not have correct shape
+        piv_chol = PivotedCholesky(
+            step=0,
+            tril=matrix[..., 0],
+            perm=torch.arange(n)[None].expand(len(matrix), n).contiguous(),
+            diag=diag.clone(),
+            validate_init=False,
+        )
+        self.assertTrue(isinstance(piv_chol, PivotedCholesky))
diff --git a/test/utils/probability/test_mvnxpb.py b/test/utils/probability/test_mvnxpb.py
index 00e29633d3..a1533e41da 100644
--- a/test/utils/probability/test_mvnxpb.py
+++ b/test/utils/probability/test_mvnxpb.py
@@ -6,6 +6,8 @@
 
 from __future__ import annotations
 
+from copy import deepcopy
+
 from functools import partial
 from itertools import count
 from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union
@@ -15,6 +17,7 @@
 from botorch.utils.probability.linalg import PivotedCholesky
 from botorch.utils.probability.mvnxpb import MVNXPB
 from botorch.utils.testing import BotorchTestCase
+from linear_operator.utils.errors import NotPSDError
 from torch import Tensor
 
 
@@ -270,6 +273,30 @@ def test_augment(self):
             ):
                 self.assertEqualMXNBPB(full, augm)
 
+            # testing errors
+            fake_init = deepcopy(init)
+            fake_init.piv_chol.step = fake_init.perm.shape[-1] + 1
+            error_msg = "Augmentation of incomplete solutions not implemented yet."
+            with self.assertRaisesRegex(NotImplementedError, error_msg):
+                augm = fake_init.augment(
+                    covariance_matrix=_cov[..., n:, n:],
+                    cross_covariance_matrix=_cov[..., n:, :n],
+                    bounds=_bounds[..., n:, :],
+                )
+
+            # Testing that solver will try to recover if it encounters
+            # a non-psd matrix, even if it ultimately fails in this case
+            error_msg = (
+                "Matrix not positive definite after repeatedly adding jitter up to.*"
+            )
+            with self.assertRaisesRegex(NotPSDError, error_msg):
+                fake_cov = torch.ones_like(_cov[..., n:, n:])
+                augm = init.augment(
+                    covariance_matrix=fake_cov,
+                    cross_covariance_matrix=_cov[..., n:, :n],
+                    bounds=_bounds[..., n:, :],
+                )
+
     def test_getitem(self):
         with torch.random.fork_rng():
             torch.random.manual_seed(1)
@@ -290,6 +317,11 @@ def test_getitem(self):
             b = getattr(other.piv_chol, key)
             self.assertTrue(a.equal(b))
 
+        fake_solver = deepcopy(self.toy_solver)
+        fake_solver.log_prob_extra = torch.tensor([-1])
+        fake_solver_1 = fake_solver[:1]
+        self.assertEqual(fake_solver_1.log_prob_extra, fake_solver.log_prob_extra[:1])
+
     def test_concat(self):
         split = len(self.toy_solver.log_prob) // 2
         A = self.toy_solver[:split]
@@ -358,3 +390,20 @@ def test_asdict(self):
     def test_build(self):
         other = MVNXPB.build(**self.toy_solver.asdict())
         self.assertEqualMXNBPB(self.toy_solver, other)
+
+    def test_exceptions(self):
+        # in solve
+        fake_solver = deepcopy(self.toy_solver)
+        fake_solver.step = fake_solver.piv_chol.step + 1
+        error_msg = "Invalid state: solver ran ahead of matrix decomposition."
+        with self.assertRaises(ValueError, msg=error_msg):
+            fake_solver.solve()
+
+        # in _pivot
+        with self.assertRaises(ValueError):
+            pivot = torch.LongTensor([-1])  # this will not be used before the raise
+            fake_solver.pivot_(pivot)
+
+        error_msg = f"Expected `other` to be {type(fake_solver)} typed but was.*"
+        with self.assertRaisesRegex(TypeError, error_msg):
+            fake_solver.concat(1, 1)
diff --git a/test/utils/probability/test_unified_skew_normal.py b/test/utils/probability/test_unified_skew_normal.py
index 96fa0458c1..00ccfd1aed 100644
--- a/test/utils/probability/test_unified_skew_normal.py
+++ b/test/utils/probability/test_unified_skew_normal.py
@@ -6,7 +6,10 @@
 
 from __future__ import annotations
 
+from copy import deepcopy
+
 from itertools import count
+
 from typing import Any, Dict, Optional, Sequence, Tuple
 
 import torch
@@ -16,6 +19,7 @@
 )
 from botorch.utils.probability.unified_skew_normal import UnifiedSkewNormal
 from botorch.utils.testing import BotorchTestCase
+from linear_operator.operators import DenseLinearOperator
 from torch import Tensor
 from torch.distributions import MultivariateNormal
 from torch.special import ndtri
@@ -124,6 +128,12 @@ def test_log_prob(self):
                 # Compare with log probabilities returned by class
                 self.assertTrue(log_probs.allclose(usn.log_prob(vals)))
 
+                # checking error handling when incorrectly shaped value is passed
+                wrong_vals = torch.cat((vals, vals), dim=-1)
+                error_msg = ".*with shape.*does not comply with the instance.*"
+                with self.assertRaisesRegex(ValueError, error_msg):
+                    usn.log_prob(wrong_vals)
+
     def test_rsample(self):
         # TODO: Replace with e.g. two-sample test.
         with torch.random.fork_rng():
@@ -163,6 +173,11 @@ def test_rsample(self):
 
     def test_expand(self):
         usn = next(iter(self.distributions))
+        # calling these lazy properties to cached them and
+        #  hit associated branches in expand
+        usn._orthogonalized_gauss
+        usn.covariance_matrix
+
         other = usn.expand(torch.Size([2]))
         for key in ("loc", "covariance_matrix"):
             a = getattr(usn.gauss, key)
@@ -174,3 +189,98 @@ def test_expand(self):
 
         for b in other.cross_covariance_matrix.unbind():
             self.assertTrue(usn.cross_covariance_matrix.equal(b))
+
+        fake_usn = deepcopy(usn)
+        fake_usn.covariance_matrix = -1
+        error_msg = (
+            f"Type {type(-1)} of UnifiedSkewNormal's lazy property "
+            "covariance_matrix not supported.*"
+        )
+        with self.assertRaisesRegex(TypeError, error_msg):
+            other = fake_usn.expand(torch.Size([2]))
+
+    def test_validate_args(self):
+        for d in self.distributions:
+            error_msg = ".*is only well-defined for positive definite.*"
+            with self.assertRaisesRegex(ValueError, error_msg):
+                gauss = deepcopy(d.gauss)
+                gauss.covariance_matrix *= -1
+                UnifiedSkewNormal(d.trunc, gauss, d.cross_covariance_matrix)
+
+            error_msg = ".*-dimensional `trunc` incompatible with.*-dimensional `gauss"
+            with self.assertRaisesRegex(ValueError, error_msg):
+                gauss = deepcopy(d.gauss)
+                gauss._event_shape = (*gauss._event_shape, 1)
+                UnifiedSkewNormal(d.trunc, gauss, d.cross_covariance_matrix)
+
+            error_msg = "Incompatible batch shapes"
+            with self.assertRaisesRegex(ValueError, error_msg):
+                gauss = deepcopy(d.gauss)
+                trunc = deepcopy(d.trunc)
+                gauss._batch_shape = (*gauss._batch_shape, 2)
+                trunc._batch_shape = (*trunc._batch_shape, 3)
+                UnifiedSkewNormal(trunc, gauss, d.cross_covariance_matrix)
+
+    def test_properties(self):
+        orth = "_orthogonalized_gauss"
+        scal = "scale_tril"
+        for d in self.distributions:
+            # testing calling orthogonalized_gauss and scale_tril
+            usn = UnifiedSkewNormal(
+                d.trunc, d.gauss, d.cross_covariance_matrix, validate_args=False
+            )
+            self.assertTrue(orth not in usn.__dict__)
+            self.assertTrue(scal not in usn.__dict__)
+            usn._orthogonalized_gauss
+            self.assertTrue(orth in usn.__dict__)
+            self.assertTrue(scal not in usn.__dict__)
+            usn.scale_tril
+            self.assertTrue(orth in usn.__dict__)
+            self.assertTrue(scal in usn.__dict__)
+
+            # testing calling orthogonalized_gauss and scale_tril in reverse order
+            usn = UnifiedSkewNormal(
+                d.trunc, d.gauss, d.cross_covariance_matrix, validate_args=False
+            )
+            usn.scale_tril
+            self.assertTrue(orth not in usn.__dict__)
+            self.assertTrue(scal in usn.__dict__)
+            usn._orthogonalized_gauss
+            self.assertTrue(orth in usn.__dict__)
+            self.assertTrue(scal in usn.__dict__)
+
+    def test_covariance_matrix(self):
+        for d in self.distributions:
+            cov = d.covariance_matrix
+            self.assertTrue(isinstance(cov, Tensor))
+
+            # testing for symmetry
+            self.assertTrue(torch.allclose(cov, cov.mT))
+
+            # testing for positive-definiteness
+            ispd = False
+            try:
+                torch.linalg.cholesky(cov)
+                ispd = True
+            except RuntimeError:
+                pass
+            self.assertTrue(ispd)
+
+            # checking that linear operator to tensor conversion
+            # leads to same covariance matrix
+            xcov_linop = DenseLinearOperator(d.cross_covariance_matrix)
+            usn_linop = UnifiedSkewNormal(
+                trunc=d.trunc, gauss=d.gauss, cross_covariance_matrix=xcov_linop
+            )
+            cov_linop = usn_linop.covariance_matrix
+            self.assertTrue(isinstance(cov_linop, Tensor))
+            self.assertTrue(torch.allclose(cov, cov_linop))
+
+    def test_repr(self):
+        for d in self.distributions:
+            r = repr(d)
+            self.assertTrue(f"trunc: {d.trunc}" in r)
+            self.assertTrue(f"gauss: {d.gauss}" in r)
+            self.assertTrue(
+                f"cross_covariance_matrix: {d.cross_covariance_matrix.shape}" in r
+            )
diff --git a/test/utils/probability/test_utils.py b/test/utils/probability/test_utils.py
index d1cb574e16..d02b31ae34 100644
--- a/test/utils/probability/test_utils.py
+++ b/test/utils/probability/test_utils.py
@@ -50,6 +50,29 @@ def test_case_dispatcher(self):
             active[mask] = False
         self.assertTrue(~active.any() or output[active].eq(len(levels)).all())
 
+        # testing mask.all() branch
+        edge_cases = [
+            (lambda: torch.full(values.shape, True), lambda mask: float("nan"))
+        ]
+        output = utils.case_dispatcher(
+            out=torch.full_like(values, float("nan")),
+            cases=edge_cases,
+            default=lambda mask: len(levels),
+        )
+
+        # testing if not active.any() branch
+        pred = torch.full(values.shape, True)
+        pred[0] = False
+        edge_cases = [
+            (lambda: pred, lambda mask: False),
+            (lambda: torch.full(values.shape, True), lambda mask: False),
+        ]
+        output = utils.case_dispatcher(
+            out=torch.full_like(values, float("nan")),
+            cases=edge_cases,
+            default=lambda mask: len(levels),
+        )
+
     def test_build_positional_indices(self):
         with torch.random.fork_rng():
             torch.random.manual_seed(0)