From b322dbf1c83d0a4f68fa234960e2d19c42432c00 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 24 Jun 2022 15:43:53 +0100 Subject: [PATCH 01/50] first commit keops --- alibi_detect/cd/base.py | 2 + alibi_detect/cd/keops/__init__.py | 0 alibi_detect/cd/keops/learned_kernel.py | 0 alibi_detect/cd/keops/mmd.py | 192 ++++++++++++++++++++++++ alibi_detect/cd/mmd.py | 19 ++- alibi_detect/utils/frameworks.py | 6 + alibi_detect/utils/keops/__init__.py | 0 alibi_detect/utils/keops/kernels.py | 143 ++++++++++++++++++ 8 files changed, 357 insertions(+), 5 deletions(-) create mode 100644 alibi_detect/cd/keops/__init__.py create mode 100644 alibi_detect/cd/keops/learned_kernel.py create mode 100644 alibi_detect/cd/keops/mmd.py create mode 100644 alibi_detect/utils/keops/__init__.py create mode 100644 alibi_detect/utils/keops/kernels.py diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py index 690bc39f9..b5ff7ed88 100644 --- a/alibi_detect/cd/base.py +++ b/alibi_detect/cd/base.py @@ -505,6 +505,7 @@ def __init__( self.infer_sigma = configure_kernel_from_x_ref if configure_kernel_from_x_ref and isinstance(sigma, np.ndarray): self.infer_sigma = False + # TODO: this might print a message for keops despite not existing configure_kernel_from_x_ref logger.warning('`sigma` is specified for the kernel and `configure_kernel_from_x_ref` ' 'is set to True. `sigma` argument takes priority over ' '`configure_kernel_from_x_ref` (set to False).') @@ -547,6 +548,7 @@ def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray else: return self.x_ref, x # type: ignore[return-value] + # TODO: not absolutely required for keops...?! @abstractmethod def kernel_matrix(self, x: Union['torch.Tensor', 'tf.Tensor'], y: Union['torch.Tensor', 'tf.Tensor']) \ -> Union['torch.Tensor', 'tf.Tensor']: diff --git a/alibi_detect/cd/keops/__init__.py b/alibi_detect/cd/keops/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/alibi_detect/cd/keops/learned_kernel.py b/alibi_detect/cd/keops/learned_kernel.py new file mode 100644 index 000000000..e69de29bb diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py new file mode 100644 index 000000000..3bceb97a9 --- /dev/null +++ b/alibi_detect/cd/keops/mmd.py @@ -0,0 +1,192 @@ +import logging +import numpy as np +import torch +from typing import Callable, Dict, Optional, Tuple, Union +from alibi_detect.cd.base import BaseMMDDrift +from alibi_detect.utils.keops.kernels import GaussianRBF +from alibi_detect.utils.pytorch import get_device + +logger = logging.getLogger(__name__) + + +class MMDDriftKeops(BaseMMDDrift): + def __init__( + self, + x_ref: Union[np.ndarray, list], + p_val: float = .05, + preprocess_x_ref: bool = True, + update_x_ref: Optional[Dict[str, int]] = None, + preprocess_fn: Optional[Callable] = None, + kernel: Callable = GaussianRBF, + sigma: Optional[np.ndarray] = None, + n_permutations: int = 100, + batch_size_permutations: int = 1000000, + device: Optional[str] = None, + input_shape: Optional[tuple] = None, + data_type: Optional[str] = None + ) -> None: + """ + Maximum Mean Discrepancy (MMD) data drift detector using a permutation test. + + Parameters + ---------- + x_ref + Data used as reference distribution. + p_val + p-value used for the significance of the permutation test. + preprocess_x_ref + Whether to already preprocess and store the reference data. + update_x_ref + Reference data can optionally be updated to the last n instances seen by the detector + or via reservoir sampling with size n. For the former, the parameter equals {'last': n} while + for reservoir sampling {'reservoir_sampling': n} is passed. + preprocess_fn + Function to preprocess the data before computing the data drift metrics. + kernel + Kernel used for the MMD computation, defaults to Gaussian RBF kernel. + sigma + Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. + The kernel evaluation is then averaged over those bandwidths. + n_permutations + Number of permutations used in the permutation test. + batch_size_permutations + KeOps computes the n_permutations of the MMD^2 statistics in chunks of batch_size_permutations. + device + Device type used. The default None tries to use the GPU and falls back on CPU if needed. + Can be specified by passing either 'cuda', 'gpu' or 'cpu'. + input_shape + Shape of input data. + data_type + Optionally specify the data type (tabular, image or time-series). Added to metadata. + """ + super().__init__( + x_ref=x_ref, + p_val=p_val, + preprocess_x_ref=preprocess_x_ref, + update_x_ref=update_x_ref, + preprocess_fn=preprocess_fn, + sigma=sigma, + n_permutations=n_permutations, + input_shape=input_shape, + data_type=data_type + ) + self.meta.update({'backend': 'keops'}) + + # set device + self.device = get_device(device) + + # initialize kernel + sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] + np.ndarray) else None + self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + + # set the correct MMD^2 function based on the batch size for the permutations + self.batch_size = batch_size_permutations + self.n_batches = 1 + (n_permutations - 1) // batch_size_permutations + self.mmd2 = self._mmd2 if self.n_batches == 1 else self._batched_mmd2 + + def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ + -> Tuple[torch.Tensor, torch.Tensor]: + """ + Compute MMD^2 for the original test statistic and all permutations at once. + + Parameters + ---------- + x_all + Concatenated reference and test instances. + perms + List with permutation vectors. + m + Number of reference instances. + n + Number of test instances. + + Returns + ------- + MMD^2 statistic for the original and permuted reference and test sets. + """ + x_all = x_all.to(self.device) + + # construct stacked tensors with all permutations for the reference set x and test set y + x = torch.cat([x_all[None, :m, :], torch.cat([x_all[perm[:m]][None, :, :] for perm in perms], 0)], 0) + y = torch.cat([x_all[None, m:, :], torch.cat([x_all[perm[m:]][None, :, :] for perm in perms], 0)], 0) + + # compute summed kernel matrices + c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) + # TODO: check where permutations=True and reduce_sum=True belong + k_xx = self.kernel(x, x, permutations=True, reduce_sum=True) + k_yy = self.kernel(y, y, permutations=True, reduce_sum=True) + k_xy = self.kernel(x, y, permutations=True, reduce_sum=True) + stats = c_xx * (k_xx - m) + c_yy * (k_yy - n) - c_xy * k_xy # TODO: check diagonal adjustment + return stats[0], stats[1:] + + # TODO: just use _batched_mmd2?! Need to check time diff with original approach which should be minimal + def _batched_mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ + -> Tuple[torch.Tensor, torch.Tensor]: + """ + Batched (across the permutations) MMD^2 computation for the original test statistic and the permutations. + + Parameters + ---------- + x_all + Concatenated reference and test instances. + perms + List with permutation vectors. + m + Number of reference instances. + n + Number of test instances. + + Returns + ------- + MMD^2 statistic for the original and permuted reference and test sets. + """ + k_xx, k_yy, k_xy = [], [], [] + for batch in range(self.n_batches): + i, j = batch * self.batch_size, (batch + 1) * self.batch_size + # construct stacked tensors with a batch of permutations for the reference set x and test set y + x = torch.cat([x_all[perm[:m]][None, :, :] for perm in perms[i:j]], 0) + y = torch.cat([x_all[perm[m:]][None, :, :] for perm in perms[i:j]], 0) + if batch == 0: + x = torch.cat([x_all[None, :m, :], x], 0) + y = torch.cat([x_all[None, m:, :], y], 0) + x, y = x.to(self.device), y.to(self.device) + + # batch-wise kernel matrix computation over the permutations + k_xx.append(self.kernel(x, x, permutations=True, reduce_sum=True)) + k_yy.append(self.kernel(y, y, permutations=True, reduce_sum=True)) + k_xy.append(self.kernel(x, y, permutations=True, reduce_sum=True)) + c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) + stats = c_xx * (torch.cat(k_xx) - m) + c_yy * (torch.cat(k_yy) - n) - c_xy * torch.cat(k_xy) + return stats[0], stats[1:] + + def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: + """ + Compute the p-value resulting from a permutation test using the maximum mean discrepancy + as a distance measure between the reference data and the data to be tested. + + Parameters + ---------- + x + Batch of instances. + + Returns + ------- + p-value obtained from the permutation test, the MMD^2 between the reference and test set, + and the MMD^2 threshold above which drift is flagged. + """ + x_ref, x = self.preprocess(x) + x_ref = torch.from_numpy(x_ref).float() # type: ignore[assignment] + x = torch.from_numpy(x).float() # type: ignore[assignment] + # compute kernel matrix, MMD^2 and apply permutation test + m, n = x_ref.shape[0], x.shape[0] + perms = [torch.randperm(m + n) for _ in range(self.n_permutations)] + x_all = torch.cat([x_ref, x], 0) + mmd2, mmd2_permuted = self.mmd2(x_all, perms, m, n) + if self.device.type == 'cuda': + mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu() + p_val = (mmd2 <= mmd2_permuted).float().mean() + # compute distance threshold + idx_threshold = int(self.p_val * len(mmd2_permuted)) + distance_threshold = torch.sort(mmd2_permuted, descending=True).values[idx_threshold] + return p_val.numpy().item(), mmd2.numpy().item(), distance_threshold.numpy() diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 0da0dec5b..0e00ad32c 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -1,7 +1,10 @@ import logging import numpy as np from typing import Callable, Dict, Optional, Union, Tuple -from alibi_detect.utils.frameworks import has_pytorch, has_tensorflow +from alibi_detect.utils.frameworks import has_keops, has_pytorch, has_tensorflow + +if has_keops: + from alibi_detect.cd.keops import MMDDriftKeops if has_pytorch: from alibi_detect.cd.pytorch.mmd import MMDDriftTorch @@ -68,10 +71,11 @@ def __init__( super().__init__() backend = backend.lower() - if backend == 'tensorflow' and not has_tensorflow or backend == 'pytorch' and not has_pytorch: + if backend == 'tensorflow' and not has_tensorflow or backend == 'pytorch' and not has_pytorch \ + or backend == 'keops' and not has_keops: raise ImportError(f'{backend} not installed. Cannot initialize and run the ' f'MMDDrift detector with {backend} backend.') - elif backend not in ['tensorflow', 'pytorch']: + elif backend not in ['tensorflow', 'pytorch', 'keops']: raise NotImplementedError(f'{backend} not implemented. Use tensorflow or pytorch instead.') kwargs = locals() @@ -82,15 +86,20 @@ def __init__( if kernel is None: if backend == 'tensorflow': from alibi_detect.utils.tensorflow.kernels import GaussianRBF - else: + elif backend == 'pytorch': from alibi_detect.utils.pytorch.kernels import GaussianRBF # type: ignore + else: + from alibi_detect.utils.keops.kernels import GaussianRBF # type: ignore kwargs.update({'kernel': GaussianRBF}) if backend == 'tensorflow' and has_tensorflow: kwargs.pop('device', None) self._detector = MMDDriftTF(*args, **kwargs) # type: ignore - else: + elif backend == 'pytorch' and has_pytorch: self._detector = MMDDriftTorch(*args, **kwargs) # type: ignore + else: + kwargs.pop('configure_kernel_from_x_ref', None) + self._detector = MMDDriftKeops(*args, **kwargs) # type: ignore self.meta = self._detector.meta def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True, return_distance: bool = True) \ diff --git a/alibi_detect/utils/frameworks.py b/alibi_detect/utils/frameworks.py index 5613e21cf..f742618b1 100644 --- a/alibi_detect/utils/frameworks.py +++ b/alibi_detect/utils/frameworks.py @@ -15,3 +15,9 @@ has_sklearn = True except ImportError: has_sklearn = False + +try: + import keops # noqa + has_keops = True +except ImportError: + has_keops = False diff --git a/alibi_detect/utils/keops/__init__.py b/alibi_detect/utils/keops/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py new file mode 100644 index 000000000..0bba30c8c --- /dev/null +++ b/alibi_detect/utils/keops/kernels.py @@ -0,0 +1,143 @@ +from alibi_detect.utils.pytorch.kernels import sigma_median # TODO: keops sigma_median? +import numpy as np +from pykeops.torch import LazyTensor +import torch +import torch.nn as nn +from typing import Callable, List, Tuple, Union + + +class GaussianRBF(nn.Module): + def __init__( + self, + sigma: torch.Tensor = None, + init_sigma_fn: Callable = sigma_median, + trainable: bool = False + ) -> None: + """ + Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass takes + a batch of instances x [Nx, features] and y [Ny, features] and returns the kernel + matrix [Nx, Ny]. + + Parameters + ---------- + sigma + Bandwidth used for the kernel. Needn't be specified if being inferred or trained. + Can pass multiple values to eval kernel with and then average. + init_sigma_fn + Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. + The function's signature should match :py:func:`~alibi_detect.utils.pytorch.kernels.sigma_median`, + meaning that it should take in the tensors `x`, `y` and `dist` and return `sigma`. + trainable + Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. + """ + super().__init__() + if sigma is None: + self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) + self.init_required = True + else: + #sigma = torch.Tensor([sigma]) # TODO: ensure it's done somewhere else + sigma = sigma.reshape(-1) # [1] + self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) + self.init_required = False + self.init_sigma_fn = init_sigma_fn + self.trainable = trainable + + super().__init__() + if sigma is None: + self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) + self.init_required = True + else: + sigma = sigma.reshape(-1) # [Ns,] TODO: ensure this works with keops + self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) + self.init_required = False + self.init_sigma_fn = init_sigma_fn + self.trainable = trainable + + @property + def sigma(self) -> torch.Tensor: + return self.log_sigma.exp() + + # TODO: could use original kernel with some tweaks? + # - LazyTensor input + # - permutations input + # - reduce_sum done in main detector + def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], + infer_sigma: bool = False, permutations: bool = False, reduce_sum: bool = False) -> LazyTensor: + + x, y = torch.as_tensor(x), torch.as_tensor(y) + + #if isinstance(x, np.ndarray): + # x = torch.as_tensor(x) + #if isinstance(y, np.ndarray): + # y = torch.as_tensor(y) + + if not permutations: + x_i = LazyTensor(x[:, None, :]) # [n, 1, d] + y_j = LazyTensor(y[None, :, :]) # [1, m, d] + else: + x_i = LazyTensor(x[:, :, None, :]) # [perms+1, n, 1, d] + y_j = LazyTensor(y[:, None, :, :]) # [perms+1, 1, m, d] + d_ij = ((x_i - y_j) ** 2).sum(-1) # [n, m] + + if infer_sigma or self.init_required: + if self.trainable and infer_sigma: + raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") + sigma = self.init_sigma_fn(x, y, d_ij) # TODO: would not work with default init fn + with torch.no_grad(): + self.log_sigma.copy_(sigma.log().clone()) + self.init_required = False + + gamma = 1. / (2. * self.sigma ** 2) # [1] TODO: [Ns,]? + if not permutations: + gamma = LazyTensor(gamma[None, None, :]) # [1, 1, 1] + else: + gamma = LazyTensor(gamma[None, None, None, :]) # [1, 1, 1, 1] + k_ij = (- gamma * d_ij).exp() # [n, m] or [perms+1, n, m] + if reduce_sum: + k_ij = k_ij.sum(1).sum(1).squeeze(-1) # [1] or [perms+1] + return k_ij + + +class DeepKernelKeops(nn.Module): + def __init__( + self, + proj: nn.Module, + kernel_a: nn.Module = GaussianRBFKeops(trainable=True), + kernel_b: nn.Module = GaussianRBFKeops(trainable=True), + eps: Union[float, str] = 'trainable' + ) -> None: + super().__init__() + + self.kernel_a = kernel_a + self.kernel_b = kernel_b + self.proj = proj + if kernel_b is not None: + self._init_eps(eps) + + def _init_eps(self, eps: Union[float, str]) -> None: + if isinstance(eps, float): + if not 0 < eps < 1: + raise ValueError("eps should be in (0,1)") + self.logit_eps = nn.Parameter(torch.tensor(eps).logit(), requires_grad=False) + elif eps == 'trainable': + self.logit_eps = nn.Parameter(torch.tensor(0.)) + else: + raise NotImplementedError("eps should be 'trainable' or a float in (0,1)") + + @property + def eps(self) -> torch.Tensor: + return self.logit_eps.sigmoid() if self.kernel_b is not None else torch.tensor(0.) + + def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + similarity = self.kernel_a(self.proj(x), self.proj(y)) + if self.kernel_b is not None: + similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) + return similarity + + # TODO: where does this belong? + def forward_perms(self, x_proj: torch.Tensor, y_proj: torch.Tensor, + x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + similarity = self.kernel_a(x_proj, y_proj, permutations=True) # [perms+1, n, m] + if self.kernel_b is not None: + similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y, permutations=True) + return similarity.sum(1).sum(1).squeeze(-1) # [perms+1] From 29ad4d9a2900d921b433922e937161e78123750f Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 24 Jun 2022 17:00:47 +0100 Subject: [PATCH 02/50] update kernel and mmd keops --- alibi_detect/cd/keops/mmd.py | 22 ++++++++++++----- alibi_detect/utils/keops/kernels.py | 38 ++++++++++++++--------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 3bceb97a9..67b07e684 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -1,5 +1,6 @@ import logging import numpy as np +from pykeops.torch import LazyTensor import torch from typing import Callable, Dict, Optional, Tuple, Union from alibi_detect.cd.base import BaseMMDDrift @@ -114,9 +115,12 @@ def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) # compute summed kernel matrices c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) # TODO: check where permutations=True and reduce_sum=True belong - k_xx = self.kernel(x, x, permutations=True, reduce_sum=True) - k_yy = self.kernel(y, y, permutations=True, reduce_sum=True) - k_xy = self.kernel(x, y, permutations=True, reduce_sum=True) + #k_xx = self.kernel(x, x, permutations=True, reduce_sum=True) + #k_yy = self.kernel(y, y, permutations=True, reduce_sum=True) + #k_xy = self.kernel(x, y, permutations=True, reduce_sum=True) + k_xx = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1) + k_yy = self.kernel(LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) + k_xy = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) stats = c_xx * (k_xx - m) + c_yy * (k_yy - n) - c_xy * k_xy # TODO: check diagonal adjustment return stats[0], stats[1:] @@ -153,9 +157,15 @@ def _batched_mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, x, y = x.to(self.device), y.to(self.device) # batch-wise kernel matrix computation over the permutations - k_xx.append(self.kernel(x, x, permutations=True, reduce_sum=True)) - k_yy.append(self.kernel(y, y, permutations=True, reduce_sum=True)) - k_xy.append(self.kernel(x, y, permutations=True, reduce_sum=True)) + #k_xx.append(self.kernel(x, x, permutations=True, reduce_sum=True)) + #k_yy.append(self.kernel(y, y, permutations=True, reduce_sum=True)) + #k_xy.append(self.kernel(x, y, permutations=True, reduce_sum=True)) + k_xx.append(self.kernel( + LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1)) + k_yy.append(self.kernel( + LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) + k_xy.append(self.kernel( + LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) stats = c_xx * (torch.cat(k_xx) - m) + c_yy * (torch.cat(k_yy) - n) - c_xy * torch.cat(k_xy) return stats[0], stats[1:] diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 0bba30c8c..08667bb8f 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -30,18 +30,6 @@ def __init__( trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. """ - super().__init__() - if sigma is None: - self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) - self.init_required = True - else: - #sigma = torch.Tensor([sigma]) # TODO: ensure it's done somewhere else - sigma = sigma.reshape(-1) # [1] - self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) - self.init_required = False - self.init_sigma_fn = init_sigma_fn - self.trainable = trainable - super().__init__() if sigma is None: self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) @@ -58,19 +46,14 @@ def sigma(self) -> torch.Tensor: return self.log_sigma.exp() # TODO: could use original kernel with some tweaks? - # - LazyTensor input + # - LazyTensor input -> # - permutations input # - reduce_sum done in main detector - def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], + def _forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], infer_sigma: bool = False, permutations: bool = False, reduce_sum: bool = False) -> LazyTensor: x, y = torch.as_tensor(x), torch.as_tensor(y) - #if isinstance(x, np.ndarray): - # x = torch.as_tensor(x) - #if isinstance(y, np.ndarray): - # y = torch.as_tensor(y) - if not permutations: x_i = LazyTensor(x[:, None, :]) # [n, 1, d] y_j = LazyTensor(y[None, :, :]) # [1, m, d] @@ -97,6 +80,23 @@ def forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch k_ij = k_ij.sum(1).sum(1).squeeze(-1) # [1] or [perms+1] return k_ij + def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> LazyTensor: + + dist = ((x - y) ** 2).sum(-1) + + if infer_sigma or self.init_required: + if self.trainable and infer_sigma: + raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") + sigma = self.init_sigma_fn(x, y, d_ij) # TODO: would not work with default init fn + with torch.no_grad(): + self.log_sigma.copy_(sigma.log().clone()) + self.init_required = False + + gamma = 1. / (2. * self.sigma ** 2) # [1] TODO: [Ns,]? + gamma = LazyTensor(gamma[None, None, :]) if len(dist.shape) == 2 else LazyTensor(gamma[None, None, None, :]) + kernel_mat = (- gamma * d_ij).exp() + return kernel_mat + class DeepKernelKeops(nn.Module): def __init__( From dd2d13ed2c010445f3ade5ea4a3c5b02f22d51dc Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 24 Jun 2022 18:25:34 +0100 Subject: [PATCH 03/50] allow multiple kernel bandwidths for keops --- alibi_detect/cd/keops/mmd.py | 7 ----- alibi_detect/utils/keops/kernels.py | 47 +++++------------------------ 2 files changed, 7 insertions(+), 47 deletions(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 67b07e684..62d51aeb5 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -114,10 +114,6 @@ def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) # compute summed kernel matrices c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) - # TODO: check where permutations=True and reduce_sum=True belong - #k_xx = self.kernel(x, x, permutations=True, reduce_sum=True) - #k_yy = self.kernel(y, y, permutations=True, reduce_sum=True) - #k_xy = self.kernel(x, y, permutations=True, reduce_sum=True) k_xx = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1) k_yy = self.kernel(LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) k_xy = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) @@ -157,9 +153,6 @@ def _batched_mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, x, y = x.to(self.device), y.to(self.device) # batch-wise kernel matrix computation over the permutations - #k_xx.append(self.kernel(x, x, permutations=True, reduce_sum=True)) - #k_yy.append(self.kernel(y, y, permutations=True, reduce_sum=True)) - #k_xy.append(self.kernel(x, y, permutations=True, reduce_sum=True)) k_xx.append(self.kernel( LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1)) k_yy.append(self.kernel( diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 08667bb8f..d3b64973d 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -10,7 +10,7 @@ class GaussianRBF(nn.Module): def __init__( self, sigma: torch.Tensor = None, - init_sigma_fn: Callable = sigma_median, + init_sigma_fn: Callable = sigma_median, # TODO: would not work with default init fn trainable: bool = False ) -> None: """ @@ -35,7 +35,7 @@ def __init__( self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) self.init_required = True else: - sigma = sigma.reshape(-1) # [Ns,] TODO: ensure this works with keops + sigma = sigma.reshape(-1) # [Ns,] self.log_sigma = nn.Parameter(sigma.log(), requires_grad=trainable) self.init_required = False self.init_sigma_fn = init_sigma_fn @@ -45,41 +45,6 @@ def __init__( def sigma(self) -> torch.Tensor: return self.log_sigma.exp() - # TODO: could use original kernel with some tweaks? - # - LazyTensor input -> - # - permutations input - # - reduce_sum done in main detector - def _forward(self, x: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor], - infer_sigma: bool = False, permutations: bool = False, reduce_sum: bool = False) -> LazyTensor: - - x, y = torch.as_tensor(x), torch.as_tensor(y) - - if not permutations: - x_i = LazyTensor(x[:, None, :]) # [n, 1, d] - y_j = LazyTensor(y[None, :, :]) # [1, m, d] - else: - x_i = LazyTensor(x[:, :, None, :]) # [perms+1, n, 1, d] - y_j = LazyTensor(y[:, None, :, :]) # [perms+1, 1, m, d] - d_ij = ((x_i - y_j) ** 2).sum(-1) # [n, m] - - if infer_sigma or self.init_required: - if self.trainable and infer_sigma: - raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, d_ij) # TODO: would not work with default init fn - with torch.no_grad(): - self.log_sigma.copy_(sigma.log().clone()) - self.init_required = False - - gamma = 1. / (2. * self.sigma ** 2) # [1] TODO: [Ns,]? - if not permutations: - gamma = LazyTensor(gamma[None, None, :]) # [1, 1, 1] - else: - gamma = LazyTensor(gamma[None, None, None, :]) # [1, 1, 1, 1] - k_ij = (- gamma * d_ij).exp() # [n, m] or [perms+1, n, m] - if reduce_sum: - k_ij = k_ij.sum(1).sum(1).squeeze(-1) # [1] or [perms+1] - return k_ij - def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> LazyTensor: dist = ((x - y) ** 2).sum(-1) @@ -87,14 +52,16 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La if infer_sigma or self.init_required: if self.trainable and infer_sigma: raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, d_ij) # TODO: would not work with default init fn + sigma = self.init_sigma_fn(x, y, dist) with torch.no_grad(): self.log_sigma.copy_(sigma.log().clone()) self.init_required = False - gamma = 1. / (2. * self.sigma ** 2) # [1] TODO: [Ns,]? + gamma = 1. / (2. * self.sigma ** 2) gamma = LazyTensor(gamma[None, None, :]) if len(dist.shape) == 2 else LazyTensor(gamma[None, None, None, :]) - kernel_mat = (- gamma * d_ij).exp() + kernel_mat = (- gamma * dist).exp() + if len(dist.shape) < len(gamma.shape): + kernel_mat = kernel_mat.sum(-1) / len(self.sigma) return kernel_mat From 0a7944c308b148bbdfa4c6c14af17b1dae6864c8 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 24 Jun 2022 19:09:58 +0100 Subject: [PATCH 04/50] fix bug --- alibi_detect/utils/keops/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index d3b64973d..dc2777d4f 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -25,7 +25,7 @@ def __init__( Can pass multiple values to eval kernel with and then average. init_sigma_fn Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. - The function's signature should match :py:func:`~alibi_detect.utils.pytorch.kernels.sigma_median`, + The function's signature should match :py:func:`~alibi_detect.utils.keops.kernels.sigma_median`, meaning that it should take in the tensors `x`, `y` and `dist` and return `sigma`. trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. From 17d1662e5d2f08e53ceabb794353a020e55ef0fd Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Thu, 30 Jun 2022 16:18:43 +0100 Subject: [PATCH 05/50] update mmd --- alibi_detect/cd/keops/learned_kernel.py | 0 alibi_detect/cd/mmd.py | 22 ++++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) delete mode 100644 alibi_detect/cd/keops/learned_kernel.py diff --git a/alibi_detect/cd/keops/learned_kernel.py b/alibi_detect/cd/keops/learned_kernel.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 0e00ad32c..336cf9be0 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -28,6 +28,7 @@ def __init__( sigma: Optional[np.ndarray] = None, configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, + batch_size_permutations: int = 1000000, device: Optional[str] = None, input_shape: Optional[tuple] = None, data_type: Optional[str] = None @@ -60,6 +61,9 @@ def __init__( Whether to already configure the kernel bandwidth from the reference data. n_permutations Number of permutations used in the permutation test. + batch_size_permutations + KeOps computes the n_permutations of the MMD^2 statistics in chunks of batch_size_permutations. + Only relevant for 'keops' backend. device Device type used. The default None tries to use the GPU and falls back on CPU if needed. Can be specified by passing either 'cuda', 'gpu' or 'cpu'. Only relevant for 'pytorch' backend. @@ -81,6 +85,15 @@ def __init__( kwargs = locals() args = [kwargs['x_ref']] pop_kwargs = ['self', 'x_ref', 'backend', '__class__'] + if backend == 'tensorflow' and has_tensorflow: + pop_kwargs += ['device', 'batch_size_permutations'] + detector = MMDDriftTF + elif backend == 'pytorch' and has_pytorch: + pop_kwargs += ['batch_size_permutations'] + detector = MMDDriftTorch + else: + pop_kwargs += ['configure_kernel_from_x_ref'] + detector = MMDDriftKeops [kwargs.pop(k, None) for k in pop_kwargs] if kernel is None: @@ -92,14 +105,7 @@ def __init__( from alibi_detect.utils.keops.kernels import GaussianRBF # type: ignore kwargs.update({'kernel': GaussianRBF}) - if backend == 'tensorflow' and has_tensorflow: - kwargs.pop('device', None) - self._detector = MMDDriftTF(*args, **kwargs) # type: ignore - elif backend == 'pytorch' and has_pytorch: - self._detector = MMDDriftTorch(*args, **kwargs) # type: ignore - else: - kwargs.pop('configure_kernel_from_x_ref', None) - self._detector = MMDDriftKeops(*args, **kwargs) # type: ignore + self._detector = detector(*args, **kwargs) # type: ignore self.meta = self._detector.meta def predict(self, x: Union[np.ndarray, list], return_p_val: bool = True, return_distance: bool = True) \ From cf528f701ec89302ecadd85f5ff58a595d1b35c9 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Mon, 4 Jul 2022 14:30:21 +0100 Subject: [PATCH 06/50] remove learned kernel and base kernel_matrix MMD function --- alibi_detect/cd/base.py | 6 ---- alibi_detect/utils/keops/kernels.py | 45 ----------------------------- 2 files changed, 51 deletions(-) diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py index b5ff7ed88..9dfad33a5 100644 --- a/alibi_detect/cd/base.py +++ b/alibi_detect/cd/base.py @@ -548,12 +548,6 @@ def preprocess(self, x: Union[np.ndarray, list]) -> Tuple[np.ndarray, np.ndarray else: return self.x_ref, x # type: ignore[return-value] - # TODO: not absolutely required for keops...?! - @abstractmethod - def kernel_matrix(self, x: Union['torch.Tensor', 'tf.Tensor'], y: Union['torch.Tensor', 'tf.Tensor']) \ - -> Union['torch.Tensor', 'tf.Tensor']: - pass - @abstractmethod def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: pass diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index dc2777d4f..f1f563d8b 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -63,48 +63,3 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La if len(dist.shape) < len(gamma.shape): kernel_mat = kernel_mat.sum(-1) / len(self.sigma) return kernel_mat - - -class DeepKernelKeops(nn.Module): - def __init__( - self, - proj: nn.Module, - kernel_a: nn.Module = GaussianRBFKeops(trainable=True), - kernel_b: nn.Module = GaussianRBFKeops(trainable=True), - eps: Union[float, str] = 'trainable' - ) -> None: - super().__init__() - - self.kernel_a = kernel_a - self.kernel_b = kernel_b - self.proj = proj - if kernel_b is not None: - self._init_eps(eps) - - def _init_eps(self, eps: Union[float, str]) -> None: - if isinstance(eps, float): - if not 0 < eps < 1: - raise ValueError("eps should be in (0,1)") - self.logit_eps = nn.Parameter(torch.tensor(eps).logit(), requires_grad=False) - elif eps == 'trainable': - self.logit_eps = nn.Parameter(torch.tensor(0.)) - else: - raise NotImplementedError("eps should be 'trainable' or a float in (0,1)") - - @property - def eps(self) -> torch.Tensor: - return self.logit_eps.sigmoid() if self.kernel_b is not None else torch.tensor(0.) - - def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - similarity = self.kernel_a(self.proj(x), self.proj(y)) - if self.kernel_b is not None: - similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y) - return similarity - - # TODO: where does this belong? - def forward_perms(self, x_proj: torch.Tensor, y_proj: torch.Tensor, - x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: - similarity = self.kernel_a(x_proj, y_proj, permutations=True) # [perms+1, n, m] - if self.kernel_b is not None: - similarity = (1-self.eps)*similarity + self.eps*self.kernel_b(x, y, permutations=True) - return similarity.sum(1).sum(1).squeeze(-1) # [perms+1] From 38ec19b7cd3f8feb8511247557bc298bab0d738d Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 5 Jul 2022 13:48:50 +0100 Subject: [PATCH 07/50] unify batched mmd2 --- alibi_detect/cd/base.py | 1 + alibi_detect/cd/keops/mmd.py | 38 +----------------------------------- 2 files changed, 2 insertions(+), 37 deletions(-) diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py index 9dfad33a5..94461e789 100644 --- a/alibi_detect/cd/base.py +++ b/alibi_detect/cd/base.py @@ -502,6 +502,7 @@ def __init__( if p_val is None: logger.warning('No p-value set for the drift threshold. Need to set it to detect data drift.') + # TODO: now not supported by KeOps detector -> either support or move to framework-specific implementations self.infer_sigma = configure_kernel_from_x_ref if configure_kernel_from_x_ref and isinstance(sigma, np.ndarray): self.infer_sigma = False diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 62d51aeb5..76c600756 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -84,46 +84,10 @@ def __init__( # set the correct MMD^2 function based on the batch size for the permutations self.batch_size = batch_size_permutations self.n_batches = 1 + (n_permutations - 1) // batch_size_permutations - self.mmd2 = self._mmd2 if self.n_batches == 1 else self._batched_mmd2 def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ -> Tuple[torch.Tensor, torch.Tensor]: """ - Compute MMD^2 for the original test statistic and all permutations at once. - - Parameters - ---------- - x_all - Concatenated reference and test instances. - perms - List with permutation vectors. - m - Number of reference instances. - n - Number of test instances. - - Returns - ------- - MMD^2 statistic for the original and permuted reference and test sets. - """ - x_all = x_all.to(self.device) - - # construct stacked tensors with all permutations for the reference set x and test set y - x = torch.cat([x_all[None, :m, :], torch.cat([x_all[perm[:m]][None, :, :] for perm in perms], 0)], 0) - y = torch.cat([x_all[None, m:, :], torch.cat([x_all[perm[m:]][None, :, :] for perm in perms], 0)], 0) - - # compute summed kernel matrices - c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) - k_xx = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1) - k_yy = self.kernel(LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) - k_xy = self.kernel(LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1) - stats = c_xx * (k_xx - m) + c_yy * (k_yy - n) - c_xy * k_xy # TODO: check diagonal adjustment - return stats[0], stats[1:] - - # TODO: just use _batched_mmd2?! Need to check time diff with original approach which should be minimal - def _batched_mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ - -> Tuple[torch.Tensor, torch.Tensor]: - """ Batched (across the permutations) MMD^2 computation for the original test statistic and the permutations. Parameters @@ -185,7 +149,7 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: m, n = x_ref.shape[0], x.shape[0] perms = [torch.randperm(m + n) for _ in range(self.n_permutations)] x_all = torch.cat([x_ref, x], 0) - mmd2, mmd2_permuted = self.mmd2(x_all, perms, m, n) + mmd2, mmd2_permuted = self._mmd2(x_all, perms, m, n) if self.device.type == 'cuda': mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu() p_val = (mmd2 <= mmd2_permuted).float().mean() From e943c6c00ee877765ae4c0ad5492877f7416025c Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Wed, 6 Jul 2022 12:01:42 +0100 Subject: [PATCH 08/50] update keops mmd --- alibi_detect/cd/base.py | 2 -- alibi_detect/cd/keops/mmd.py | 12 +++++++++++ alibi_detect/utils/keops/kernels.py | 32 ++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/alibi_detect/cd/base.py b/alibi_detect/cd/base.py index 94461e789..dc9900f2f 100644 --- a/alibi_detect/cd/base.py +++ b/alibi_detect/cd/base.py @@ -502,11 +502,9 @@ def __init__( if p_val is None: logger.warning('No p-value set for the drift threshold. Need to set it to detect data drift.') - # TODO: now not supported by KeOps detector -> either support or move to framework-specific implementations self.infer_sigma = configure_kernel_from_x_ref if configure_kernel_from_x_ref and isinstance(sigma, np.ndarray): self.infer_sigma = False - # TODO: this might print a message for keops despite not existing configure_kernel_from_x_ref logger.warning('`sigma` is specified for the kernel and `configure_kernel_from_x_ref` ' 'is set to True. `sigma` argument takes priority over ' '`configure_kernel_from_x_ref` (set to False).') diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 76c600756..1ee73657a 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -20,6 +20,7 @@ def __init__( preprocess_fn: Optional[Callable] = None, kernel: Callable = GaussianRBF, sigma: Optional[np.ndarray] = None, + configure_kernel_from_x_ref: bool = True, n_permutations: int = 100, batch_size_permutations: int = 1000000, device: Optional[str] = None, @@ -48,6 +49,8 @@ def __init__( sigma Optionally set the GaussianRBF kernel bandwidth. Can also pass multiple bandwidth values as an array. The kernel evaluation is then averaged over those bandwidths. + configure_kernel_from_x_ref + Whether to already configure the kernel bandwidth from the reference data. n_permutations Number of permutations used in the permutation test. batch_size_permutations @@ -67,6 +70,7 @@ def __init__( update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, sigma=sigma, + configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, input_shape=input_shape, data_type=data_type @@ -85,6 +89,14 @@ def __init__( self.batch_size = batch_size_permutations self.n_batches = 1 + (n_permutations - 1) // batch_size_permutations + # infer the kernel bandwidth from the reference data + if self.infer_sigma or isinstance(sigma, torch.Tensor): + x = torch.from_numpy(self.x_ref).to(self.device) + _ = self.kernel(LazyTensor(x[:, None, :]), LazyTensor(x[None, :, :]), infer_sigma=self.infer_sigma) + self.infer_sigma = False + else: + self.infer_sigma = True + def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) \ -> Tuple[torch.Tensor, torch.Tensor]: """ diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index f1f563d8b..ab0d136f6 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -1,4 +1,3 @@ -from alibi_detect.utils.pytorch.kernels import sigma_median # TODO: keops sigma_median? import numpy as np from pykeops.torch import LazyTensor import torch @@ -6,11 +5,38 @@ from typing import Callable, List, Tuple, Union +def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: + """ + Bandwidth estimation using the mean heuristic. + + Parameters + ---------- + x + LazyTensor of instances with dimension [Nx, 1, features]. + y + LazyTensor of instances with dimension [1, Ny, features]. + dist + LazyTensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + + Returns + ------- + The computed bandwidth, `sigma`. + """ + n = x.shape[0] + if (dist.min(axis=1) == 0.).all() and (torch.arange(n) == dist.argmin(axis=1).view(-1)).all() \ + and x.shape == y.shape: + n_mean = n * (n - 1) + else: + n_mean = np.prod(dist.shape) + sigma = (.5 * dist.sum(1).sum().unsqueeze(-1) / n_mean) ** .5 + return sigma + + class GaussianRBF(nn.Module): def __init__( self, sigma: torch.Tensor = None, - init_sigma_fn: Callable = sigma_median, # TODO: would not work with default init fn + init_sigma_fn: Callable = sigma_mean, trainable: bool = False ) -> None: """ @@ -25,7 +51,7 @@ def __init__( Can pass multiple values to eval kernel with and then average. init_sigma_fn Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. - The function's signature should match :py:func:`~alibi_detect.utils.keops.kernels.sigma_median`, + The function's signature should match :py:func:`~alibi_detect.utils.keops.kernels.sigma_mean`, meaning that it should take in the tensors `x`, `y` and `dist` and return `sigma`. trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. From 0487cb2ce966aea68ac838c7434cd297c2b22f7e Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Wed, 6 Jul 2022 14:20:19 +0100 Subject: [PATCH 09/50] update docs and kernel import --- alibi_detect/utils/keops/__init__.py | 5 +++++ doc/source/cd/methods/mmddrift.ipynb | 33 +++++++++++++++------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/alibi_detect/utils/keops/__init__.py b/alibi_detect/utils/keops/__init__.py index e69de29bb..235176e6b 100644 --- a/alibi_detect/utils/keops/__init__.py +++ b/alibi_detect/utils/keops/__init__.py @@ -0,0 +1,5 @@ +from .kernels import GaussianRBF + +__all__ = [ + "GaussianRBF" +] diff --git a/doc/source/cd/methods/mmddrift.ipynb b/doc/source/cd/methods/mmddrift.ipynb index 3f9c7e64e..91b59efbe 100644 --- a/doc/source/cd/methods/mmddrift.ipynb +++ b/doc/source/cd/methods/mmddrift.ipynb @@ -44,7 +44,7 @@ "\n", "Keyword arguments:\n", "\n", - "* `backend`: Both **TensorFlow** and **PyTorch** implementations of the MMD detector as well as various preprocessing steps are available. Specify the backend (*tensorflow* or *pytorch*). Defaults to *tensorflow*.\n", + "* `backend`: **TensorFlow**, **PyTorch** and [**KeOps**](https://github.com/getkeops/keops) implementations of the MMD detector as well as various preprocessing steps are available. Specify the backend (*tensorflow*, *pytorch* or *keops*). Defaults to *tensorflow*.\n", "\n", "* `p_val`: p-value used for significance of the permutation test.\n", "\n", @@ -54,11 +54,11 @@ "\n", "* `preprocess_fn`: Function to preprocess the data before computing the data drift metrics. Typically a dimensionality reduction technique.\n", "\n", - "* `kernel`: Kernel used when computing the MMD. Defaults to a Gaussian RBF kernel (`from alibi_detect.utils.pytorch import GaussianRBF` or `from alibi_detect.utils.tensorflow import GaussianRBF` dependent on the backend used).\n", + "* `kernel`: Kernel used when computing the MMD. Defaults to a Gaussian RBF kernel (`from alibi_detect.utils.pytorch import GaussianRBF`, `from alibi_detect.utils.tensorflow import GaussianRBF` or `from alibi_detect.utils.keops import GaussianRBF` dependent on the backend used).\n", "\n", "* `sigma`: Optional bandwidth for the kernel as a `np.ndarray`. We can also average over a number of different bandwidths, e.g. `np.array([.5, 1., 1.5])`.\n", "\n", - "* `configure_kernel_from_x_ref`: If `sigma` is not specified, the detector can infer it via a heuristic and set `sigma` to the median pairwise distance between 2 samples. If `configure_kernel_from_x_ref` is *True*, we can already set `sigma` at initialization of the detector by inferring it from `x_ref`, speeding up the prediction step. If set to *False*, `sigma` is computed separately for each test batch at prediction time.\n", + "* `configure_kernel_from_x_ref`: If `sigma` is not specified, the detector can infer it via a heuristic and set `sigma` to the median (*TensorFlow* and *PyTorch*) or the mean pairwise distance between 2 samples (*KeOps*) by default. If `configure_kernel_from_x_ref` is *True*, we can already set `sigma` at initialization of the detector by inferring it from `x_ref`, speeding up the prediction step. If set to *False*, `sigma` is computed separately for each test batch at prediction time.\n", "\n", "* `n_permutations`: Number of permutations used in the permutation test.\n", "\n", @@ -71,23 +71,22 @@ "\n", "* `device`: *cuda* or *gpu* to use the GPU and *cpu* for the CPU. If the device is not specified, the detector will try to leverage the GPU if possible and otherwise fall back on CPU.\n", "\n", + "Additional KeOps keyword arguments:\n", "\n", - "Initialized drift detector example:\n", + "* `batch_size_permutations`: KeOps computes the `n_permutations` of the MMD^2 statistics in chunks of `batch_size_permutations`. Defaults to 1,000,000.\n", + "\n", + "Initialized drift detector examples for each of the available backends:\n", "\n", "\n", "```python\n", "from alibi_detect.cd import MMDDrift\n", "\n", - "cd = MMDDrift(x_ref, backend='tensorflow', p_val=.05)\n", + "cd_tf = MMDDrift(x_ref, backend='tensorflow', p_val=.05)\n", + "cd_torch = MMDDrift(x_ref, backend='pytorch', p_val=.05)\n", + "cd_keops = MMDDrift(x_ref, backend='keops', p_val=.05)\n", "```\n", "\n", - "The same detector in PyTorch:\n", - "\n", - "```python\n", - "cd = MMDDrift(x_ref, backend='pytorch', p_val=.05)\n", - "```\n", - "\n", - "We can also easily add preprocessing functions for both frameworks. The following example uses a randomly initialized image encoder in PyTorch:\n", + "We can also easily add preprocessing functions for the *TensorFlow * and *PyTorch* frameworks. The following example uses a randomly initialized image encoder in PyTorch:\n", "\n", "```python\n", "from functools import partial\n", @@ -196,7 +195,7 @@ "cd = load_detector(filepath)\n", "```\n", "\n", - "Currently on the **TensorFlow** backend is supported for `save_detector` and `load_detector`. Adding **PyTorch** support is a near term priority." + "Currently on the **TensorFlow** backend is supported for `save_detector` and `load_detector`. Adding **PyTorch** and **KeOps** support is a near term priority." ] }, { @@ -213,6 +212,10 @@ "\n", "[Drift detection on CIFAR10](../../examples/cd_mmd_cifar10.ipynb)\n", "\n", + "### Tabular\n", + "\n", + "[Scaling up drift detection with KeOps](../../examples/cd_mmd_keops.ipynb)\n", + "\n", "### Text\n", "\n", "[Text drift detection on IMDB movie reviews](../../examples/cd_text_imdb.ipynb)" @@ -224,7 +227,7 @@ "hash": "ffba93b5284319fb7a107c8eacae647f441487dcc7e0323a4c0d3feb66ea8c5e" }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -238,7 +241,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.11" + "version": "3.7.6" } }, "nbformat": 4, From a6a4641b3a44ece473216377004f1b6f119ca138 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Thu, 7 Jul 2022 15:50:19 +0100 Subject: [PATCH 10/50] bugfixes --- alibi_detect/cd/keops/mmd.py | 4 ++-- alibi_detect/cd/mmd.py | 4 ++-- alibi_detect/cd/pytorch/mmd.py | 2 +- alibi_detect/utils/frameworks.py | 2 +- alibi_detect/utils/keops/kernels.py | 8 ++++---- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 1ee73657a..3c32572d7 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -2,7 +2,7 @@ import numpy as np from pykeops.torch import LazyTensor import torch -from typing import Callable, Dict, Optional, Tuple, Union +from typing import Callable, Dict, List, Optional, Tuple, Union from alibi_detect.cd.base import BaseMMDDrift from alibi_detect.utils.keops.kernels import GaussianRBF from alibi_detect.utils.pytorch import get_device @@ -83,7 +83,7 @@ def __init__( # initialize kernel sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] np.ndarray) else None - self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + self.kernel = kernel(sigma).to(self.device) if kernel == GaussianRBF else kernel # set the correct MMD^2 function based on the batch size for the permutations self.batch_size = batch_size_permutations diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 336cf9be0..565edf8b1 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -4,7 +4,7 @@ from alibi_detect.utils.frameworks import has_keops, has_pytorch, has_tensorflow if has_keops: - from alibi_detect.cd.keops import MMDDriftKeops + from alibi_detect.cd.keops.mmd import MMDDriftKeops if has_pytorch: from alibi_detect.cd.pytorch.mmd import MMDDriftTorch @@ -80,7 +80,7 @@ def __init__( raise ImportError(f'{backend} not installed. Cannot initialize and run the ' f'MMDDrift detector with {backend} backend.') elif backend not in ['tensorflow', 'pytorch', 'keops']: - raise NotImplementedError(f'{backend} not implemented. Use tensorflow or pytorch instead.') + raise NotImplementedError(f'{backend} not implemented. Use tensorflow, pytorch or keops instead.') kwargs = locals() args = [kwargs['x_ref']] diff --git a/alibi_detect/cd/pytorch/mmd.py b/alibi_detect/cd/pytorch/mmd.py index 1bb6ffe0b..c0ad58dcd 100644 --- a/alibi_detect/cd/pytorch/mmd.py +++ b/alibi_detect/cd/pytorch/mmd.py @@ -80,7 +80,7 @@ def __init__( # initialize kernel sigma = torch.from_numpy(sigma).to(self.device) if isinstance(sigma, # type: ignore[assignment] np.ndarray) else None - self.kernel = kernel(sigma) if kernel == GaussianRBF else kernel + self.kernel = kernel(sigma).to(self.device) if kernel == GaussianRBF else kernel # compute kernel matrix for the reference data if self.infer_sigma or isinstance(sigma, torch.Tensor): diff --git a/alibi_detect/utils/frameworks.py b/alibi_detect/utils/frameworks.py index f742618b1..7899ef748 100644 --- a/alibi_detect/utils/frameworks.py +++ b/alibi_detect/utils/frameworks.py @@ -17,7 +17,7 @@ has_sklearn = False try: - import keops # noqa + import pykeops # noqa has_keops = True except ImportError: has_keops = False diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index ab0d136f6..cedafd709 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -2,7 +2,7 @@ from pykeops.torch import LazyTensor import torch import torch.nn as nn -from typing import Callable, List, Tuple, Union +from typing import Callable, List, Optional, Tuple, Union def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: @@ -23,7 +23,7 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: The computed bandwidth, `sigma`. """ n = x.shape[0] - if (dist.min(axis=1) == 0.).all() and (torch.arange(n) == dist.argmin(axis=1).view(-1)).all() \ + if (dist.min(axis=1) == 0.).all() and (torch.arange(n) == dist.argmin(axis=1).cpu().view(-1)).all() \ and x.shape == y.shape: n_mean = n * (n - 1) else: @@ -35,7 +35,7 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: class GaussianRBF(nn.Module): def __init__( self, - sigma: torch.Tensor = None, + sigma: Optional[torch.Tensor] = None, init_sigma_fn: Callable = sigma_mean, trainable: bool = False ) -> None: @@ -78,7 +78,7 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La if infer_sigma or self.init_required: if self.trainable and infer_sigma: raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, dist) + sigma = self.init_sigma_fn(x, y, dist) # .to(x.device) with torch.no_grad(): self.log_sigma.copy_(sigma.log().clone()) self.init_required = False From e2b27f54a4e24ec0a0f30038dfb5d7bf0ff347bd Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 14:38:50 +0100 Subject: [PATCH 11/50] remove unused imports --- alibi_detect/utils/keops/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index cedafd709..2f471841a 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -2,7 +2,7 @@ from pykeops.torch import LazyTensor import torch import torch.nn as nn -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, Optional def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: From d442be8af22481c8ae6ed2ed5dc305d7f7e5d99b Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 14:50:24 +0100 Subject: [PATCH 12/50] add benchmarking example --- doc/source/examples/cd_mmd_keops.ipynb | 513 +++++++++++++++++++++++++ 1 file changed, 513 insertions(+) create mode 100644 doc/source/examples/cd_mmd_keops.ipynb diff --git a/doc/source/examples/cd_mmd_keops.ipynb b/doc/source/examples/cd_mmd_keops.ipynb new file mode 100644 index 000000000..280170883 --- /dev/null +++ b/doc/source/examples/cd_mmd_keops.ipynb @@ -0,0 +1,513 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "27a4394b", + "metadata": {}, + "source": [ + "# Scaling up drift detection with KeOps\n", + "\n", + "## Introduction\n", + "\n", + "A number of convenient and powerful kernel-based drift detectors such as the MMD detector ([Gretton et al., 2012](https://jmlr.csail.mit.edu/papers/v13/gretton12a.html)) do not scale favourably with increasing dataset size $n$, leading to quadratic complexity $\\mathcal{O}(n^2)$ for naive implementations. As a result, we can quickly run into memory issues by having to store the $[N_\\text{ref} + N_\\text{test}, N_\\text{ref} + N_\\text{test}]$ kernel matrix (on the GPU if applicable) used for an efficient implementation of the permutation test. Note that $N_\\text{ref}$ is the reference data size and $N_\\text{test}$ the test data size.\n", + "\n", + "We can however drastically speed up and scale up kernel-based drift detectors to large dataset sizes by working with symbolic kernel matrices instead and leverage the [KeOps](https://www.kernel-operations.io/keops/index.html) library to do so. For the user of $\\texttt{Alibi Detect}$ the only thing that changes is the specification of the detector's backend:\n", + "\n", + "\n", + "```python\n", + "from alibi_detect.cd import MMDDrift\n", + "\n", + "detector_torch = MMDDrift(x_ref, backend='pytorch')\n", + "detector_keops = MMDDrift(x_ref, backend='keops')\n", + "```\n", + "\n", + "In this notebook we will run a few simple benchmarks to illustrate the speed and memory improvements from using KeOps over vanilla PyTorch on the GPU (1x RTX 2080 Ti).\n", + "\n", + "## Data\n", + "\n", + "We randomly sample points from the standard normal distribution and run the MMD detectors with PyTorch and KeOps backends for the following settings:\n", + "\n", + "- $N_\\text{ref}, N_\\text{test} = [2, 5, 10, 20, 50, 100]$ (batch sizes in '000s)\n", + "- $D = [2, 10, 50]$\n", + "\n", + "Where $D$ denotes the number of features.\n", + "\n", + "## Requirements\n", + "\n", + "The notebook requires [PyTorch](https://pytorch.org/) and KeOps to be installed. Once PyTorch is installed, KeOps can be installed via pip:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a0bf1719", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pykeops" + ] + }, + { + "cell_type": "markdown", + "id": "7ff93d59", + "metadata": {}, + "source": [ + "Before we start let’s fix the random seeds for reproducibility:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "2ba95f29", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "\n", + "def set_seed(seed: int) -> None:\n", + " torch.manual_seed(seed)\n", + " torch.cuda.manual_seed(seed)\n", + " np.random.seed(seed)\n", + "\n", + "set_seed(2022)" + ] + }, + { + "cell_type": "markdown", + "id": "1910895a", + "metadata": {}, + "source": [ + "\n", + "## Vanilla PyTorch vs. KeOps comparison\n", + "\n", + "### Experiments\n", + "\n", + "First we define some utility functions to run the experiments:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a1c65254", + "metadata": {}, + "outputs": [], + "source": [ + "from alibi_detect.cd import MMDDrift\n", + "import matplotlib.pyplot as plt\n", + "from scipy.stats import kstest\n", + "from timeit import default_timer as timer\n", + "\n", + "\n", + "def eval_detector(p_vals: np.ndarray, threshold: float, is_drift: bool, t_mean: float, t_std: float) -> dict:\n", + " \"\"\" In case of drifted data (ground truth) it returns the detector's power.\n", + " In case of no drift, it computes the false positive rate (FPR) and whether the p-values\n", + " are uniformly distributed U[0,1] which is checked via a KS test. \"\"\"\n", + " results = {'power': None, 'fpr': None, 'ks': None}\n", + " below_p_val_threshold = (p_vals <= threshold).mean()\n", + " if is_drift:\n", + " results['power'] = below_p_val_threshold\n", + " else:\n", + " results['fpr'] = below_p_val_threshold\n", + " stat_ks, p_val_ks = kstest(p_vals, 'uniform')\n", + " results['ks'] = {'p_val': p_val_ks, 'stat': stat_ks}\n", + " results['p_vals'] = p_vals\n", + " results['time'] = {'mean': t_mean, 'stdev': t_std}\n", + " return results\n", + "\n", + "\n", + "def experiment(backend: str, n_runs: int, n_ref: int, n_test: int, n_features: int, mu: float = 0.) -> dict:\n", + " \"\"\" Runs the experiment n_runs times, each time with newly sampled reference and test data.\n", + " Returns the p-values for each test as well as the mean and standard deviations of the runtimes. \"\"\"\n", + " p_vals, t_detect = [], []\n", + " for _ in range(n_runs):\n", + " # Sample reference and test data\n", + " x_ref = np.random.randn(*(n_ref, n_features)).astype(np.float32)\n", + " x_test = np.random.randn(*(n_test, n_features)).astype(np.float32) + mu\n", + " \n", + " # Initialise detector, make and log predictions\n", + " p_val = .05\n", + " dd = MMDDrift(x_ref, backend=backend, p_val=p_val, n_permutations=100)\n", + " start = timer()\n", + " pred = dd.predict(x_test)\n", + " end = timer()\n", + " \n", + " if _ > 0: # first run reserved for KeOps compilation\n", + " t_detect.append(end - start)\n", + " p_vals.append(pred['data']['p_val'])\n", + " \n", + " del dd, x_ref, x_test\n", + " torch.cuda.empty_cache()\n", + " \n", + " p_vals = np.array(p_vals)\n", + " t_mean, t_std = np.array(t_detect).mean(), np.array(t_detect).std()\n", + " results = eval_detector(p_vals, p_val, mu == 0., t_mean, t_std)\n", + " return results\n", + "\n", + "\n", + "def format_results(n_features: list, backends: list, max_batch_size: int = 1e10) -> dict:\n", + " T = {'batch_size': None, 'keops': None, 'pytorch': None}\n", + " T['batch_size'] = np.unique([experiments['keops'][_]['n_ref'] for _ in experiments['keops'].keys()])\n", + " T['batch_size'] = list(T['batch_size'][T['batch_size'] <= max_batch_size])\n", + " T['keops'] = {f: [] for f in n_features}\n", + " T['pytorch'] = {f: [] for f in n_features}\n", + "\n", + " for backend in backends:\n", + " for f in T[backend].keys():\n", + " for bs in T['batch_size']:\n", + " for k, v in experiments[backend].items():\n", + " if f == v['n_features'] and bs == v['n_ref']:\n", + " T[backend][f].append(results[backend][k]['time']['mean'])\n", + "\n", + " for k, v in T['keops'].items(): # apply padding\n", + " n_pad = len(v) - len(T['pytorch'][k])\n", + " T['pytorch'][k] += [np.nan for _ in range(n_pad)]\n", + " return T\n", + "\n", + "\n", + "def plot_absolute_time(results: dict, n_features: list, y_scale: str = 'linear', \n", + " detector: str = 'MMD', max_batch_size: int = 1e10):\n", + " T = format_results(n_features, ['keops', 'pytorch'], max_batch_size)\n", + " colors = ['b', 'g', 'r', 'c', 'm', 'y', 'b']\n", + " legend, n_c = [], 0\n", + " for f in n_features:\n", + " plt.plot(T['batch_size'], T['keops'][f], linestyle='solid', color=colors[n_c]);\n", + " legend.append(f'keops - {f}')\n", + " plt.plot(T['batch_size'], T['pytorch'][f], linestyle='dashed', color=colors[n_c]);\n", + " legend.append(f'pytorch - {f}')\n", + " n_c += 1\n", + " plt.title(f'{detector} drift detection time for 100 permutations')\n", + " plt.legend(legend, loc=(1.1,.1));\n", + " plt.xlabel('Batch size');\n", + " plt.ylabel('Time (s)');\n", + " plt.yscale(y_scale);\n", + " plt.show();\n", + "\n", + "\n", + "def plot_relative_time(results: dict, n_features: list, y_scale: str = 'linear',\n", + " detector: str = 'MMD', max_batch_size: int = 1e10):\n", + " T = format_results(n_features, ['keops', 'pytorch'], max_batch_size)\n", + " colors = ['b', 'g', 'r', 'c', 'm', 'y', 'b']\n", + " legend, n_c = [], 0\n", + " for f in n_features:\n", + " t_keops, t_torch = T['keops'][f], T['pytorch'][f]\n", + " ratio = [tt / tk for tt, tk in zip(t_torch, t_keops)]\n", + " plt.plot(T['batch_size'], ratio, linestyle='solid', color=colors[n_c]);\n", + " legend.append(f'pytorch/keops - {f}')\n", + " n_c += 1\n", + " plt.title(f'{detector} drift detection pytorch/keops time ratio for 100 permutations')\n", + " plt.legend(legend, loc=(1.1,.1));\n", + " plt.xlabel('Batch size');\n", + " plt.ylabel('time pytorch / keops');\n", + " plt.yscale(y_scale);\n", + " plt.show();" + ] + }, + { + "cell_type": "markdown", + "id": "43a4ee7e", + "metadata": {}, + "source": [ + "As detailed earlier, we will compare the PyTorch with the KeOps implementation of the MMD detector for a variety of reference and test data batch sizes as well as different feature dimensions. Note that for the PyTorch implementation, the portion of the kernel matrix for the reference data itself can already be computed at initialisation of the detector. This computation will not be included when we record the detector's prediction time. Since use cases where $N_\\text{ref} >> N_\\text{test}$ are quite common, we will also test for this specific setting. The key reason is that we cannot amortise this computation for the KeOps detector since we are working with lazily evaluated symbolic matrices.\n", + "\n", + "#### $N_\\text{ref} = N_\\text{test}$\n", + "\n", + "Note that for KeOps we could further increase the number of instances in the reference and test sets (e.g. to 500,000) without running into memory issues." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "47268603", + "metadata": {}, + "outputs": [], + "source": [ + "experiments = {\n", + " 'keops': {\n", + " 0: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 2},\n", + " 1: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 2},\n", + " 2: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 2},\n", + " 3: {'n_ref': 20000, 'n_test': 20000, 'n_runs': 10, 'n_features': 2},\n", + " 4: {'n_ref': 50000, 'n_test': 50000, 'n_runs': 10, 'n_features': 2},\n", + " 5: {'n_ref': 100000, 'n_test': 100000, 'n_runs': 10, 'n_features': 2},\n", + " 6: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 10},\n", + " 7: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 10},\n", + " 8: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 10},\n", + " 9: {'n_ref': 20000, 'n_test': 20000, 'n_runs': 10, 'n_features': 10},\n", + " 10: {'n_ref': 50000, 'n_test': 50000, 'n_runs': 10, 'n_features': 10},\n", + " 11: {'n_ref': 100000, 'n_test': 100000, 'n_runs': 10, 'n_features': 10},\n", + " 12: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 50},\n", + " 13: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 50},\n", + " 14: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 50},\n", + " 15: {'n_ref': 20000, 'n_test': 20000, 'n_runs': 10, 'n_features': 50},\n", + " 16: {'n_ref': 50000, 'n_test': 50000, 'n_runs': 10, 'n_features': 50},\n", + " 17: {'n_ref': 100000, 'n_test': 100000, 'n_runs': 10, 'n_features': 50}\n", + " },\n", + " 'pytorch': { # runs OOM after 10k instances in ref and test sets\n", + " 0: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 2},\n", + " 1: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 2},\n", + " 2: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 2},\n", + " 3: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 10},\n", + " 4: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 10},\n", + " 5: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 10},\n", + " 6: {'n_ref': 2000, 'n_test': 2000, 'n_runs': 10, 'n_features': 50},\n", + " 7: {'n_ref': 5000, 'n_test': 5000, 'n_runs': 10, 'n_features': 50},\n", + " 8: {'n_ref': 10000, 'n_test': 10000, 'n_runs': 10, 'n_features': 50}\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "d556296a", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "backends = ['keops', 'pytorch']\n", + "results = {backend: {} for backend in backends}\n", + "\n", + "for backend in backends:\n", + " exps = experiments[backend]\n", + " for i, exp in exps.items():\n", + " results[backend][i] = experiment(\n", + " backend, exp['n_runs'], exp['n_ref'], exp['n_test'], exp['n_features']\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "93396443", + "metadata": {}, + "source": [ + "Below we visualise the runtimes of the different experiments. We can make the following observations:\n", + "\n", + "- The relative **speed** improvements of KeOps over vanilla PyTorch increase with increasing batch size.\n", + "\n", + "- Due to the explicit kernel computation and storage, the PyTorch detector runs out-of-memory after a little over 10,000 instances in each of the reference and test sets while KeOps keeps **scaling** up without any issues.\n", + "\n", + "- The relative speed improvements decline with growing **feature dimension**. Note however that we would not recommend using a (untrained) MMD detector on very high-dimensional data in the first place.\n", + "\n", + "The plots show both the absolute and relative (PyTorch / KeOps) mean prediction times for the MMD drift detector for different feature dimensions $[2, 10, 50]$." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5d854bfb", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "n_features = [2, 10, 50]\n", + "max_batch_size = 100000\n", + "\n", + "plot_absolute_time(results, n_features, max_batch_size=max_batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ec9d0fbb", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_relative_time(results, n_features, max_batch_size=max_batch_size)" + ] + }, + { + "cell_type": "markdown", + "id": "b96a904b", + "metadata": {}, + "source": [ + "The difference between KeOps and PyTorch is even more striking when we only look at $[2, 10]$ features:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0d1e4dfa", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_absolute_time(results, [2, 10], max_batch_size=max_batch_size)" + ] + }, + { + "cell_type": "markdown", + "id": "6e920708", + "metadata": {}, + "source": [ + "#### $N_\\text{ref} >> N_\\text{test}$\n", + "\n", + "Now we check whether the speed improvements still hold when $N_\\text{ref} >> N_\\text{test}$ ($N_\\text{ref} / N_\\text{test} = 10$) and a large part of the kernel can already be computed at initialisation time of the PyTorch (but not the KeOps) detector." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a75794e8", + "metadata": {}, + "outputs": [], + "source": [ + "experiments = {\n", + " 'keops': {\n", + " 0: {'n_ref': 2000, 'n_test': 200, 'n_runs': 10, 'n_features': 2},\n", + " 1: {'n_ref': 5000, 'n_test': 500, 'n_runs': 10, 'n_features': 2},\n", + " 2: {'n_ref': 10000, 'n_test': 1000, 'n_runs': 10, 'n_features': 2},\n", + " 3: {'n_ref': 20000, 'n_test': 2000, 'n_runs': 10, 'n_features': 2},\n", + " 4: {'n_ref': 50000, 'n_test': 5000, 'n_runs': 10, 'n_features': 2},\n", + " 5: {'n_ref': 100000, 'n_test': 10000, 'n_runs': 10, 'n_features': 2}\n", + " },\n", + " 'pytorch': {\n", + " 0: {'n_ref': 2000, 'n_test': 200, 'n_runs': 10, 'n_features': 2},\n", + " 1: {'n_ref': 5000, 'n_test': 500, 'n_runs': 10, 'n_features': 2},\n", + " 2: {'n_ref': 10000, 'n_test': 1000, 'n_runs': 10, 'n_features': 2}\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fcdd840a", + "metadata": {}, + "outputs": [], + "source": [ + "results = {backend: {} for backend in backends}\n", + "\n", + "for backend in backends:\n", + " exps = experiments[backend]\n", + " for i, exp in exps.items():\n", + " results[backend][i] = experiment(\n", + " backend, exp['n_runs'], exp['n_ref'], exp['n_test'], exp['n_features']\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "27307020", + "metadata": {}, + "source": [ + "The below plots illustrate that KeOps indeed still provides large speed ups over PyTorch. The x-axis shows the reference batch size $N_\\text{ref}$. Note that $N_\\text{ref} / N_\\text{test} = 10$." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0a3c0d27", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_absolute_time(results, [2], max_batch_size=max_batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "cf6a0dfc", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhsAAAEWCAYAAADPUVX+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3dd5gUVdbH8e8hgySBERFUUAliAkHW+KqorIhZQcwJcWVNGNacs655XRXFgIJZERWzsGaUARWQqGIgg5KVNOf9497RdpzQM0xPTfh9nqee6bqVTldXV5+5deuWuTsiIiIimVIt6QBERESkclOyISIiIhmlZENEREQySsmGiIiIZJSSDREREckoJRsiIiKSURU22TAzN7OtCpn+gJldkTJ+hpnNM7PlZta0BNtrHbdZo6Qxl5b4HrZIOo6yYmaPmdn1xZi/0GOjvDGz183sxKTjSIeZbRaPv+olWLaumb1iZkvM7LlMxCclY2bHmtlbScchlVeRyYaZzTSz1WbWLE/5+HhSbx3HH4vjh+SZ785YflIcP8nM1sUT1nIz+87MHjWzdqX2rgB3/4e7Xxe3WRO4A+jh7vXdfVEmf5Die/ywlNY12sz6pZbF9/Btaay/NJSHRMzMdjGzj5PafrrM7GozezK1zN17uvvjScVUmPj93zd33N1/iMffuhKs7kigOdDU3XuXQmy1zOz5GKOb2V55ppuZ3WJmi+Jwi5lZyvROZpZtZivj307rG1MSinu+ye/76u5D3b1HZiIUSb9m4zvg6NwRM9sOqJfPfNOAE1LmqwH0Ab7JM98n7l4faATsC/wKZJvZtumHXrB8/utqDtQBJpXG+qV0lVKS0gsYWQrrqTISSA43B6a5+9riLlhIrB8CxwFz85nWHzgU2AHYHjgIOD2urxbwMvAksCHwOPByLM+YmABV2BplkRJz90IHYCZwOfB5Stm/gcsAB1rHssdi+Txgw1h2IPA64YRwUiw7Cfgwn+28CjxfSBwXAnOA2cApcdtbpWz7fsKPzQpCAvMYcD3QLpY5sBx4D3g/jq+IZUfls73q8f0sBL4F/hmXqRGnNwIGx5hmxW1VB7YGfgPWxXUvjvPXjuv7Ie6jB4C6Kds7BPgCWEpIzvYHbojr+S2u6z9x3tT33ggYAiwAvo+fVbXUfR23+wshaexZxGd9CfB1nP9RoE6cNhE4KGXemnHfdI7vKXf/Lgd2ISSyl8eY5scYG8VlW8f5T43Lvh/Ldwc+BhYDP/LHMfMYcB/wGrAMGANsmSf2ccCO+eyf3eO69orjHYC3gZ+BqUCflHUUtS8/Av4DLAGmAPukLHsS4ThZFvfzsfns3/2B1cCauJ++jOWjgX55tnNn3A/fArvG8h/jvjwxZZ2FHld5tp+67kWEY3ZLwndiUfw8hwKN4/xPADmEfwaWA/9K+exyvwebACPi/pwBnFbAtq/J895PpQTHSCHH7k+5n3FK2cdA/5TxU4FP4+sehO+tpUz/Adi/gPWPBm4CPiN8R18GmqRM35k/jt0vU2OJy94Q9/2vwFax7Pq4zHLgFaBp3P9Lgc/549z6p32eesxQ8PmmFzA+rutH4Oo87zPv9/UkUs7LhGPuc8Kx/jmwa55tXxffzzLgLaBZnFaHkMAtivvic6B5Ub8zGir/UPQM4QdoX8KJeWvCD+pPhP9S8iYb1wODgDNi2bOEGpF0ko1TgHkFxLA/4US6LbABMIy/JhtLgN0IJ7A6ufHE6fl9WX9fvoBt/oPwg7Ip0AQYxZ9Psi8BD8Z4NiKchE4v6D0STvAj4roaEE4uN8Vp3WL8+8X4WwId4rTRxB+i/GInnKBfjutsTahdOjUljjXAafFzO4OQrFkB73kmIanIfc8fpezDfwHPpMx7CDChkP17CuHHZwugPvAi8ESe+YfE/VeXcDwti8dLTcKJt1PK57so7qcahBPy0ynbakHKD0fu/iEcNz8C3WL5BnH85LiezoQf2I5p7su1wMAY31HxM2sS17sUaJ8SzzYF7OOrgSfzlP3+Gads5+T4mV1P+HG4j5BY9Ij7qX5Rx1U+285d91nx/deN+2m/uO4sQiJ+V97vf8r4nz7rOP9/Cd+5ToRErXs6751iHiNFnKfySzaWAH9LGe8KLIuvBwKv55n/VeD8AtY/mnCM5Z6DXsh9L4Tv6yLgAML3d784npWy7A/ANnG/14xlMwjJXiNCgj+NcK6tEd/3o4V8v/IeM3nPN3sB28V4tiecPw8tZH2/r4NwLP0CHB9jOTqON03Z9jeEf+TqxvGb47TTCcdgPcLx2wVoWNTvjIbKPxSnOu8JwiWS/YDJhC9efoYAJ5hZY2BPYHia659NOMjz04fwxZvo7isIJ628Xnb3j9w9x91/S3ObhelDOOn+6O4/E/6rAcDMmhNOLOe6+wp3n0846ffNb0XxOnF/YKC7/+zuy4AbU+Y/FXjE3d+O8c9y9ylFBRgvF/UFLnH3Ze4+E7idcJLI9b27P+ThGvvjhB/C5oWs9j8p7/kG/rh89iRwgJk1jOPHE46JghwL3OHu37r7ckKNSd881eFXx/33K3AM8I67P+Xua9x9kbt/kTLvS+7+mYcq+KGEH7ZcBwBvuLunlPUmJIM93f2zWHYgMNPdH3X3te4+nvCj0TvNfTmfcEyscfdnCAl4rzgtB9jWzOq6+xx3X59Ldt/FGNcBzxCSv2vdfZW7v0WoIdgqjeMqP7Pd/d74/n919xnxuFvl7gsIbZv2TCdIM9uUkOBf5O6/xc/rYVIupRahuMdIcdUnJBy5lgD1437LOy13eoNC1vdEyjnoCqBPPG6OA0a6+8j4/X0bGEs4LnM95u6T4n5fE8sedfdv3H0JoQb4G3d/Jx7jzxGS4RJx99HuPiHG8xXwFGl+roRjerq7PxHjfYrwj9dBKfM86u7T4ufyLH98H9cQ/lHYyt3XuXu2uy8t6fuQyqM412yfIPwX04aQUOTL3T80syzCZZZX3f3XlDZZhWlJqIrNzyZAdsr49/nM82M6GymGTfKsM3WbmxP+O5mT8t6qFRJDFiHTz05tn0bI/CH8mJSkvUGzGEdqbN8T9mWu369lu/vKuP36hawz73veJC4728w+Ao4ws5eAnsA5haxnk3ziqsGfE53UbW3KX9v2pEq9Jr+SP7+HAwi1XanOBYa4+8SUss2Bv5nZ4pSyGoRjO519OStPQvM9sIm7rzCzo4ALgMFxP52fTsJYgHkpr38FcPe8ZfUp+rjKz5+O0Zg43w3sQfihrUb4LzYdmwC5SU6u7wk1COkuX5xjpLiWAw1TxhsCy93dzSzvtNzpyyhY3u9GTcJxszkhYU39Ma5JqA3Nb9lceT/T/D7jEjGzvwE3E2piahFqrtK9Ayjv5wKFnFf48/fxCcJ3+en4D+eTwGUpCZZUUWnXbLj794Rr0QcQqjsL8yRwPoUkJfk4DPiggGlzCAdwrs3yC7EY20pHYdv8EVhFuE7ZOA4N3X2bAmJZSDh5bJMyfyMPjWRz17dlAXEU9r4WEv6T2DxPnAXVOqUj73uenTL+OOG/uN6ERr6528kvxtn5xLWWP59QU5crbB8UKN5ptCehHUaq3sChZpaaEP0I/C/lM2js4c6KM0hvX7a0P2fOv+8fd3/T3fcj1BxNAR4qIOTSPE6LOq7S2f6NsWw7d29I+HytkPlTzQaamFlqbUBxjr/iHiPFNYnQODTXDvzRSHwSsH2ez3N7Cm9Enve7sYbwGfxIqPVIPa42cPebU+Zfn/exIv5NbZS/cRHrHka4vLapuzcitOWxQuZPlfdzgTQ/11jrd427dyS0+ziQ9Gu6pBIrbqvoUwnXY1cUMd89hMst7xc2k5lVN7M2ZnYv4RrjNQXM+ixwkpl1NLN6wFXFCztf8wjXigvyLHC2mbUysw2Bi3MnuPscQqOo282soZlVM7MtzSy3mnIe0Cq3Zbu75xB+fO40s40AzKylmf09zj8YONnM9onramlmHYqKM1azPwvcYGYNzGxz4DxCsldS/4zvuQmhduqZlGnDgR0JNRqpieQCwmWE1DifAgbGz7c+4UftGS/4ToShwL5m1sfMaphZ0zRvRdwd+CqfqtrZwD7AOWZ2Rix7FWhnZsebWc047GRmW6e5LzciHBM1zaw3oQ3TSDNrbmaHmNkGhCR0edwf+ZkHtC6NOxLSOK7S0YAQ7xIza0loiJ033oKOvx8JDRxvMrM6ZrY94RyR7vFX3GPkL8ystpnViaO1Yhy5P6pDgPPiPtmE8A/QY3HaaEKjyrPjOs6M5e8VsrnjUs5B1xIatK8jvN+DzOzv8ZxWx8z2MrNW6b6PwsTLW7Pi9qub2Sn8OTH/0/kmakCodfrNzLoRLlPmyu/7mmok4XtyTPwuHgV0JHx/CmVme5vZdvHy0lJCQlbQd0GqkGKd8OL1xbFpzPezu7+bp8o51S6xGnMp4UvfENjJ3ScUsL7XgbsIJ4IZFH5CSNfVwONmttjM+uQz/SHgTULL8nH8tTbnBEL1ZO6dG88T/qslxjcJmGtmC2PZRTH2T81sKfAO0D6+v88IDQLvJFw3/h9//GdxN3Ckmf1iZvfkE+dZhP98viU0xB0GPJLeLsjXMEIi9S3hssbvnWnF67MvEC6lvZhSvpLY2j7uz51jDLmX3r4jtJg/q6CNuvsPhFqz8wmX077gz/+VFqTAW17jOvcBLjazfrG6vwehTcNsQlXwLYQqZih6X44B2hL+m70BONLdFxG+R+fFdf5MqGk5g/zlVmUvMrNxaby/ohR4XKXpGkICuYRwt0/e4/wm4PL4uV6Qz/JHExocziY0mr7K3d9Jc9vFOkYKMJVQu9OS8H39lT++Ow8SGitOIDR8fi2W4e6rCbfFnkC4a+IUQgPK1YVs6wlCsjKX0CD27LiuHwkNpi8l/JD/SEjaSvMW19PiOhcRGpqm9imT3/lmAHCtmS0DriQk0sR48/u+kjJ9EaFG4vy4vX8BB7r7Qoq2MeFcuJTQtu9/FN62S6oIKzgfkKrGzGYSWrgX+GNhZlcC7dz9uDILrBBm9jXhR//rDG/nJMK+2T2T25HyycxGE+4+eTjpWEQqosS73paKI15aOZU/36GRmFhtPCTTiYaIiKwf9WQnaTGz0wjVw6+7e6FtccqKu6/O0whPRETKIV1GERERkYxSzYaIiIhkVIVos9GsWTNv3bp10mGIiFQo2dnZC909K+k4RCpEstG6dWvGji3yjlsREUlhZvn1tixS5nQZRURERDJKyYaIiIhklJINERERySglGyIiIpJRSjZEREQko5RsiIiISEYp2RAREZGMUrIhIlIO5eTA44/Ds88WPa9IeadkQ0SknPn0U9hlFzjpJBg6NOloRNafkg0RkXJi1iw44YSQaPz4Y6jZeOmlpKMSWX8VortyEZHK7Lff4I474MYbYc0auOSSMDRokHRkIqVDyYaISELc4cUX4YILYOZMOPxwuO022GKLpCMTKV26jCIikoCvvoLu3eHII6F+fXj3XXjhBSUaUjkp2RARKUMLF8IZZ0DnziHh+O9/Yfz4kHiIVFa6jCIiUgbWrAmJxdVXw7JlcOaZcNVV0KRJ0pGJZJ6SDRGRDHvzTTj3XJgyBfbbD+66Czp2TDoqkbKjyygiIhkybRocdBDsvz+sXQsjRoTEQ4mGVDVKNkREStmSJXDhhbDttvC//8Gtt8LEiSHxMEs6OpGyp8soIiKlZN06eOwxuPRSWLAATj4ZbrgBNt446chEkpXxmg0zq25m483s1Tj+mJl9Z2ZfxKFTpmMQEcm0Dz+Ebt2gXz/Yaiv47DMYPFiJhgiUzWWUc4DJecoudPdOcfiiDGIQEcmIH36Avn1hjz1g/nwYNiwkHl27Jh2ZSPmR0WTDzFoBvYCHM7kdEZGytnJluI21Qwd4+eVwG+uUKXD00WqXIZJXpms27gL+BeTkKb/BzL4yszvNrHaGYxARKTXu8PTTIcm45prQ6HPKlJB4bLBB0tGJlE8ZSzbM7EBgvrtn55l0CdAB2AloAlxUwPL9zWysmY1dsGBBpsIUEUlbdna4XHL00dC0abjT5JlnYPPNk45MpHzLZM3GbsDBZjYTeBrobmZPuvscD1YBjwLd8lvY3Qe5e1d375qVlZXBMEVECjdvXmj4udNOoe+MQYNg7Fj4v/9LOjKRiiFjyYa7X+Lurdy9NdAXeM/djzOzFgBmZsChwMRMxSAisj5Wr4Z//xvatYPHH4eBA0OycdppUL160tGJVBxJ9LMx1MyyAAO+AP6RQAwiIgVyh9deg/POg+nToVcvuP12aN8+6chEKqYySTbcfTQwOr7Wsw1FpNyaPDnUYLz5ZkguRo6Enj2TjkqkYlN35SIiwC+/hIelbbcdfPop3HknTJigREOkNKi7chGp0tatg4cegssvh59/hv794brrQO3SRUqPajZEpMoaNQp23BHOOCM8NG3cOHjgASUaIqVNyYaIVDnffQdHHgndu4cntD73XEg8OulJTSIZocsoIlJlLF8ON98cbmetXj1cLjn/fKhbN+nIRCo3JRsiUunl5IQHpF10EcyeDcceG5KOVq2SjkykatBlFBGp1D77DHbbDY4/HjbZBD76CJ58UomGSFlSsiEildLs2XDiifC3v8HMmfDYYzBmDOy6a9KRiVQ9uowiIpXKb7+FPjJuuAHWrIGLL4ZLL4UGDZKOTKTqUrIhIpWCOwwfHhp8fvcdHHpoaAi65ZZJRyYiuowiIhXehAmw775w+OFQrx68/Ta89JISDZHyQsmGiFRYixbBP/8Z+scYPx7uvRe++CIkHiJSfugyiohUOGvWhJ4+r7oKli6FAQPg6quhadOkIxOR/CjZEJEK5e23wwPTvv4a9tkH7rordDUuIuWXLqOISIUwYwYccgj06BHuOBk+PCQeSjREyj8lGyJSri1dGnr+7NgR3nsv9Pz59dch8TBLOjoRSUfGkw0zq25m483s1TjexszGmNkMM3vGzGplOgYRqXhycuDRR6FdO7j11tDF+LRpIfGoXTvp6ESkOMqiZuMcYHLK+C3Ane6+FfALcGoZxCAiFcjHH0O3bnDKKdCmTehy/NFHoUWLpCMTkZLIaLJhZq2AXsDDcdyA7sDzcZbHgUMzGYOIVBw//RRqMHbbDebMCc8w+fhj2GmnpCMTkfWR6ZqNu4B/ATlxvCmw2N3XxvGfgJb5LWhm/c1srJmNXbBgQYbDFJEk/fpreNx7+/bwwgtw+eUwdWpIPNQuQ6Tiy1iyYWYHAvPdPbsky7v7IHfv6u5ds7KySjk6ESkP3OG556BDB7jySjjgAJgyJSQe9esnHZ2IlJZM9rOxG3CwmR0A1AEaAncDjc2sRqzdaAXMymAMIlJOjR8P55wDH3wAO+wAQ4bAnnsmHZWIZELGajbc/RJ3b+XurYG+wHvufiwwCjgyznYi8HKmYhCR8mf+fOjfH7p0gcmT4cEHITtbiYZIZZZEPxsXAeeZ2QxCG47BCcQgImVs9Wq44w5o2zbcWXLuuTB9ekg8qldPOjoRyaQy6a7c3UcDo+Prb4FuZbFdESkfRo6EgQNDPxn77w933hnaaYhI1aAeREUkY6ZMCY0+e/UK46+9Bq+/rkRDpKpRsiEipW7xYjjvPNhuO/joI7j9dpgwISQeIlL16KmvIlJq1q2DwYPhsstg0SLo1w+uvx422ijpyEQkSarZEJFS8b//hTtMTj8dtt463GEyaJASDREpQbJhZtXMrGEmghGRiuf776FPH9hrL/j5Z3jmmZB4dO6cdGQiUl6klWyY2TAza2hmGwATga/N7MLMhiYi5dmKFaHXzw4d4NVX4ZprQoPQPn3UxbiI/Fm6NRsd3X0p4aFprwNtgOMzFpWIlFvuMGxYeI7JddfBYYeF55hceSXUq5d0dCJSHqWbbNQ0s5qEZGOEu68BPHNhiUh5NHYs7L57eEBa8+ahq/Fhw2DTTZOOTETKs3STjQeBmcAGwPtmtjmwNFNBiUj5MncunHJKeNT7jBnhjpPPPw+Jh4hIUdK69dXd7wHuSSn63sz2zkxIIlJerFoFd98dLpesWgUXXhge/95QTcRFpBjSSjbMrClwFbA74fLJh8C1wKLMhSYiSXGHESPg/PPhm2/goINCx1xt2yYdmYhUROleRnkaWAAcQXhi6wLgmUwFJSLJmTQJevSAQw+FWrXgzTdD4qFEQ0RKKt1ko4W7X+fu38XheqB5JgMTkbL1889w1lmwww6hIeg998CXX4bEQ0RkfaSbbLxlZn1jh17VzKwP8GYmAxORsrF2Ldx3X6i5+O9/Qw+g06eHxKNmzaSjE5HKIN1k4zRgGLA6Dk8Dp5vZMjPTXSkiFdS770KnTnDmmaFG44svQuLRrFnSkYlIZZJWsuHuDdy9mrvXiEO1WNbA3dUuXaSC+eab0BnXvvvCypXw4osh8dhuu6QjE5HKKO2nvprZwcD/xdHR7v5qEfPXAd4HasftPO/uV5nZY8CewJI460nu/kVxAxeR4lu2DG68Ee64I1wiufFGGDgQ6tRJOjIRqczSvfX1ZmAnYGgsOsfMdnP3SwpZbBXQ3d2Xx95HPzSz1+O0C939+RJHLSLFkpMDTzwBF18cOug64QS46SbYZJOkIxORqiDdmo0DgE7ungNgZo8D44ECkw13d2B5HK0ZB3VxLlLGPvkEzjkn9PjZrRsMHw5/+1vSUYlIVVKcR8w3TnndKJ0FzKy6mX0BzAfedvcxcdINZvaVmd1pZrULWLa/mY01s7ELFiwoRpgiAjBrFhx/POy6K/z0EwwZEhIPJRoiUtbSTTZuAsab2WOxViMbuKGohdx9nbt3AloB3cxsW0JtSAfCZZkmwEUFLDvI3bu6e9esrKw0wxSRX3+FG26Adu3guefg0kth2rSQeFQrzr8XIiKlJN1nozxlZqMJCQLARe4+N92NuPtiMxsF7O/u/47Fq8zsUeCC4gQsIvlzD3eVXHABzJwJhx8Ot90GW2yRdGQiUtWl9X+OmRmwD6Hdxgiglpl1K2KZLDNrHF/XBfYDpphZi5R1HgpMXI/4RYTQ02f37nDkkdCgQbiN9YUXlGiISPmQbqXqf4FdgKPj+DLgviKWaQGMMrOvgM8JbTZeBYaa2QRgAtAMuL7YUYsIAAsWwBlnwI47woQJoQfQceNC4iEiUl6kezfK39x9RzMbD+Duv5hZrcIWcPevgM75lOs0KLKe1qwJicXVV4e+M848E666Cpo0SToyEZG/SjfZWGNm1Ym3rppZFpCTsahEpEBvvBE64poyJTwk7c47oWPHpKMSESlYupdR7gFeAjYysxuADwl3qIhIGZk2DQ48EHr2DA9Pe+WVkHgo0RCR8i7du1GGmlk2oZGoAYe6++SMRiYiACxZAtddFx75XqdOuMPkrLOgdr491IiIlD/pdld+qrsPBqaklN3s7hdnLDKRKm7dOnj00dBPxsKFcMopof+M5s2TjkxEpHjSbbNxhJn95u5DAczsPkCPbhLJkA8+CF2Mjx8Pu+0Gr78OXbokHZWISMmknWwAI8wsB9gfWOzup2YuLJGq6Ycf4F//gmeegVat4Kmn4KijwCzpyERESq7QZMPMUm+k6wcMBz4CrjGzJu7+cyaDE6kqVq6EW2+FW24J41ddFZKOevWSjUtEpDQUVbORTbjd1VL+9oqDA+qfUGQ9uIdajAsvDA9LO+qokHRstlnSkYmIlJ5Ckw13b1NWgYhUNdnZoV3GRx9B584wbBjssUfSUYmIlD49A1KkjM2dC6eeCjvtFPrOeOgh+PxzJRoiUnml20BURNbT6tWhr4xrrw2PgT/vPLjiCmjUKOnIREQyq6gGojXdfU1ZBSNSGbnDa6+F5GL6dOjVC26/Hdq3TzoyEZGyUdRllE/MbLiZ/cPMWpdBPCKVyuTJoXvxgw6CatVg5Eh49VUlGiJStRSabLh7V+DcOHqXmX1uZneaWQ8zU2fJIgX45Rc491zYbjv49NPwsLQJE0LiISJS1RTZQNTdZ7r7A+5+KLAr8AqwL/CBmb2W6QBFKpK1a+H++6FtW7j3XujXL1w6OfdcqFkz6ehERJJRrAaisf3Ge3HAzFpmIiiRimjUqHAr64QJsOeecPfdsMMOSUclIpK89br11d1nFTbdzOqY2Wdm9qWZTTKza2J5GzMbY2YzzOwZM6u1PnGIJOm77+CII6B7d1i6FJ5/PiQeSjRERIJM97OxCuju7jsAnYD9zWxn4BbgTnffCvgF0HNWpMJZvhwuuwy23hreeAOuvz40CD3iCD3LREQkVUaTDQ+Wx9GacXCgO/B8LH8cODSTcYiUppwceOIJaNcObrwRevcOnXNddhnUrZt0dCIi5U9abTbMrB1wIbB56jLu3j2NZasTnrGyFXAf8A3hqbFr4yw/AX9p+2Fm/YH+AJvpQRFSTowZE9pljBkTegB94QXYZZekoxIRKd/SbSD6HPAA8BCwrjgbcPd1QCczawy8BHRIc7lBwCCArl27enG2KVLaZs+GSy6BIUNg443hscfg+OND3xkiIlK4dJONte5+//psyN0Xm9koYBegsZnViLUbrYBCG5qKJOW330IfGTfcAGvWwMUXw6WXQoMGSUcmIlJxFPp/mZk1MbMmwCtmNsDMWuSWxfJCmVlWrNHAzOoC+wGTgVHAkXG2E4GX1+tdiJQyd3jpJejYMSQX++0HX38NN92kRENEpLiKqtnIJjTozG1bf2HKNAe2KGL5FsDjsd1GNeBZd3/VzL4Gnjaz64HxwOBiRy6SIRMmhE643nsPttkG3n4b9t036ahERCquQpMNd2+zPit396+AzvmUfwt0W591i5S2hQvhyivhwQehcWO47z7o3x9q6NnIIiLrJa3mbWb2z9zLIXF8QzMbkLmwRMrOmjXh0e9t28KgQTBgQOhifMAAJRoiIqUh3bb0p7n74twRd/8FOC0zIYmUnbfeCj19nnMOdOkCX3wRnmnSpMgWSSIikq50k43qZn/0iRjbYKiLcamwZsyAQw6Bv/8dVq2C4cND24xtt006MhGRyifdZONN4Bkz28fM9gGeAt7IXFgimbF0KVx0UbjL5L334Oabw10mhxyiLsZFRDIl3SvS/yL05nlGHH8beDgjEYlkQE5O6DWuH9QAACAASURBVIjr0kth3jw4+eTQd0aLFklHJiJS+RWZbMRLJkPc/VhCL6IiFcpHH4U2GdnZoWvxV14JXY2LiEjZKPIySuxufHM9Bl4qmh9/hGOOgd13h7lzYejQkHgo0RARKVvpXkb5FvjIzEYAK3IL3f2OjEQlsh5WroR//zu0x3CHK64I7TQ22CDpyEREqqZ0k41v4lANUGfNUi65w3PPwYUXwg8/hEe/33ortG6ddGQiIlVbWsmGu18DYGb14/jyTAYlUlzjx4d2GR98EPrNGDIE9twz6ahERATS70F0WzMbD0wCJplZtpltk9nQRIo2f37oUrxLF5g8OXQ1np2tRENEpDxJt5+NQcB57r65u28OnA88lLmwRAq3ejXccUfoYvzRR8OD06ZPD4lH9epJRyciIqnSbbOxgbuPyh1x99FmpuZ2koiRI2HgQJg2DXr2DElHhw5JRyUiIgVJt2bjWzO7wsxax+Fywh0qImVmypSQXPTqFcZfey0kHko0RETKt3STjVOALOBF4AWgGXBypoISSfXbb3DBBbDddvDxx6EmY8IEOOCApCMTEZF0pHsZZV93Pzu1wMx6A88VtICZbQoMAZoDDgxy97vN7GrCE2MXxFkvdfeRxQ1cqobvvgu3sGZnQ79+oYvxjTZKOioRESmOdJONS/hrYpFfWaq1wPnuPs7MGgDZZvZ2nHanu/+7eKFKVfPaa3D88eG5Ji+/DAcfnHREIiJSEoUmG2bWEzgAaGlm96RMakhIJgrk7nOAOfH1MjObDLRcv3ClKli3Dq68Em68ETp1guefhy23TDoqEREpqaLabMwGxgK/Adkpwwjg7+luxMxaA52BMbHoTDP7ysweMbMNixmzVGLz5kGPHiHR6NcvtNFQoiEiUrEVWrPh7l8CX5rZL8Cr7p5T3A3EXkdfAM5196Vmdj9wHaEdx3XA7YQGqHmX6094rD2bbbZZcTcrFdCHH8JRR8HPP4e+M046KemIRESkNKR7N0ofYLqZ3Wpmad9oaGY1CYnGUHd/EcDd57n7upi4PAR0y29Zdx/k7l3dvWtWVla6m5QKyD3cYbLXXlCvHnz6qRINEZHKJK1kw92PI1wG+QZ4zMw+MbP+seFnvszMgMHA5NSnw5pZi5TZDgMmlihyqRSWLg13m5x/fmgAOnZseLaJiIhUHunWbODuS4HngaeBFoREYZyZnVXAIrsBxwPdzeyLOBwA3GpmE8zsK2BvYOB6vQOpsCZMgK5dYfjw8Ej4F16ARo2SjkpEREpbWre+mtnBhE68tiL0ndHN3eebWT3ga+DevMu4+4eA5bM69akhPP44nHEGNG4Mo0bBHnskHZGIiGRKuv1sHEHoG+P91EJ3X2lmp5Z+WFJZ/fYbnHUWPPww7L03DBsGG2+cdFQiIpJJaSUb7n6imW0cazgc+Nzd58Zp72YyQKk8vv0WjjwSxo+HSy6Ba6+FGummuyIiUmGl1WYj1l58BhwOHAl8amZ/uV1VpCCvvAJduoTux195JfSjoURDRKRqSPd0/y+gs7svAjCzpsDHwCOZCkwqh7Vr4Yor4OabYccdQ2+gbdokHZWIiJSldJONRcCylPFlsUykQHPnwtFHw+jR0L8/3H031KmTdFQiIlLW0k02ZgBjzOxlQpuNQ4CvzOw8gNR+NEQAPvgg9Aa6eHG48+SEE5KOSEREkpJusvFNHHK9HP8W2KmXVE3ucPvtcPHFsMUW8OabsN12SUclIiJJSvdulGsyHYhUfEuWhG7Ghw+HI46ARx6Bhg2TjkpERJKWdg+iIoX58stwt8mrr4bnnDz3nBINEREJlGzIenv0Udh5Z/j119AYdOBAsPz6jhURkSpJyYaU2K+/Qr9+cMopsOuuobOu3XZLOioRESlv0u3Uq52ZvWtmE+P49mZ2eWZDk/Lsm29CgjF4MFx2Gbz1Fmy0UdJRiYhIeZRuzcZDwCXAGgB3/wrom6mgpHx7+eXQPuP770Mbjeuvh+rVk45KRETKq3STjXru/lmesrWlHYyUb2vXwr/+BYceClttBePGQa9eSUclIiLlXbr9bCw0sy0JHXphZkcCczIWlZQ7c+ZA377w/vvwj3/AnXeqN1AREUlPusnGP4FBQAczmwV8BxyXsaikXPnf/0JvoMuWwRNPwHH65EVEpBjSuozi7t+6+75AFtDB3Xd395mFLWNmm5rZKDP72swmmdk5sbyJmb1tZtPj3w3X+11IRrjDLbdA9+7QuDGMGaNEQ0REii+tmg0zawycALQGaljsRMHdzy5ksbXA+e4+zswaANlm9jZwEvCuu99sZhcDFwMXlfgdSEYsXgwnnggjRkDv3uGukwbqnF5EREog3csoI4FPgQlATjoLuPscYrsOd19mZpOBloSHuO0VZ3scGI2SjXJl/Hg48kj44Qe46y44+2x10iUiIiWXbrJRx93PK+lGzKw10BkYAzSPiQjAXKB5Acv0B/oDbLbZZiXdtBTT4MHwz39Cs2ahMeguuyQdkYiIVHTp3vr6hJmdZmYtYpuLJmbWJJ0Fzaw+8AJwrrsvTZ3m7k68wyUvdx/k7l3dvWtWVlaaYUpJrVwZegLt1w/22CPUbijREBGR0pBusrEauA34BMiOw9iiFjKzmoREY6i7vxiL55lZizi9BTC/uEFL6ZoxIyQWjz4KV1wBb7wByu9ERKS0pHsZ5XxgK3dfmO6KLbQiHQxMdvc7UiaNAE4Ebo5/X053nVL6XnopPBa+Rg0YORJ69kw6IhERqWzSrdmYAaws5rp3A44HupvZF3E4gJBk7Gdm04F947iUsTVr4MIL4fDDoX370BuoEg0REcmEdGs2VgBfmNkoYFVuYWG3vrr7h0BB9zDsk3aEUupmzw69gX7wAQwYAHfcAbVrJx2ViIhUVukmG8PjIBXcqFFw9NGhN9ChQ+GYY5KOSEREKru0kg13fzzTgUhm5eSE3kAvvxzatoV334Vttkk6KhERqQoKTTbM7Fl372NmE8jnFlV33z5jkUmp+eUXOOGE8Dj4o46Chx5Sb6AiIlJ2iqrZOCf+PTDTgUhmjBsXegP96Se45x4480z1BioiImWr0LtRUnr6HODu36cOwIDMhycl5Q6DBsGuu8LataE30LPOUqIhIiJlL91bX/fLp0w3SpZTK1eGvjNOPx323DPUbuy8c9JRiYhIVVVUm40zCDUYW5jZVymTGgAfZTIwKZlp08Jlk4kT4aqrQo+g1asnHZWIiFRlRbXZGAa8DtxEeBR8rmXu/nPGopISeeEFOPlkqFULXn8d/v73pCMSEREpItlw9yXAEuDosglHSmLNGrjoIrjzTvjb3+DZZ0EPyhURkfIi3U69pJyaNSvczvrRR+FOk9tvDzUbIiIi5YWSjQrsvfdCb6ArVsBTT4UuyEVERMqbdO9GkXIkJwduvBH22w+aNoXPP1eiISIi5ZdqNiqYn38OvYG+9lqo1Rg0COrXTzoqERGRginZqEDGjoXevUM7jf/8JzyxVZ10iYhIeafLKBWAOzzwAOy2G6xbFx4N/89/KtEQEZGKQclGObdiRbhscsYZ0L07jB8fbm8VERGpKDKabJjZI2Y238wmppRdbWazzOyLOByQyRgqsqlTQ2IxdChcc01op9G0adJRiYiIFE+mazYeA/bPp/xOd+8Uh5EZjqFCeu456NoV5s6FN96AK6+EaqqHEhGRCiijP1/u/j6gbs2LYfVqOPdc6NMHtt02XDbp0SPpqEREREouqf+VzzSzr+Jllg3zm8HM+pvZWDMbu2DBgrKOLxE//QR77QV33w3nnAP/+x9sumnSUYmIiKyfJJKN+4EtgU7AHOD2/GZy90Hu3tXdu2ZlZZVlfIl45x3o3BkmTIBnnoG77lK34yIiUjmUebLh7vPcfZ275wAPAd3KOobyJCcHrrsuXCpp3jz0BtqnT9JRiYiIlJ4y79TLzFq4+5w4ehgwsbD5K7NFi+D448Pj4I89Fh58EDbYIOmoRERESldGkw0zewrYC2hmZj8BVwF7mVknwIGZwOmZjKG8+vxzOPLIcLfJ/ffD6aerky4REamcMppsuPvR+RQPzuQ2y7vc3kDPPRdatIAPP4Sddko6KhGRzMnOzt6oRo0aDwPbos4kK6scYOLatWv7denSZX7eiXo2ShlasSLUYAwdCj17whNPqJMuEan8atSo8fDGG2+8dVZW1i/VqlXzpOOR0peTk2MLFizoOHfu3IeBg/NOV4ZZRqZMgW7dYNiw0CD01VeVaIhIlbFtVlbWUiUalVe1atU8KytrCaH26i9Us1EGnnkGTj0V6tWDt96CffdNOiIRkTJVTYlG5Rc/43wrMVSzkUGrV8PZZ0PfvrDDDjBunBINERGpepRsZMiPP8L//R/ce29oDDp6NLRqlXRUIiKSjosvvnjj0lrXeeedt8mVV17ZvKDpxxxzzGZvvfXWBt26dWv//vvv1yut7Zam+++/v0m7du06tmvXrmPnzp07fPLJJ3WLs7ySjQx4663QG+ikSfDss3DnnVCzZtJRiYhIuu65554WxZk/JyeHdevWlWhb48aNq9+9e/cVJVq4jGy11VarPvroo6nTpk37+pJLLpl9+umnb16c5ZVslKKcnPAo+P33h403hrFjoXfvpKMSEanapk6dWqtNmzbbHHzwwW222GKLbfbff/8tli1bVm3EiBEN9t133y1z53vppZca7rffflsOGDCg5apVq6p16NCh48EHH9wG4Oqrr27etm3bbdq2bbvNtddeu1Huelu3br3tYYcd1rpdu3bbfPPNN7Wef/75hh07dty6ffv2HXfZZZd2ueuePHly3W7durVv1arVdtdff/1GueXjxo2rs8UWW/xWo8YfTSjXrVvHEUcc0frss8/eBODFF19s2KlTpw4dO3bcumfPnlssWbKkGsDLL7/cYOutt+7Yrl27jr17927966+/GkDLli23+8c//tGqXbt2HbfbbrutJ06cWBvgkUce2bBt27bbtG/fvmPXrl3bF2cf7rfffiuysrLWAey9994r5s6dW6wHaqiBaClZuBCOOw7efDP0Cnr//eoNVEQkr1NOYdOJEynVSwXbbsvKRx7hx8LmmTlzZp0HH3xwZo8ePVb07t279W233ZZ19dVXzzvnnHM2mz17do1NNtlk7SOPPNL05JNPXnjMMccseeyxxzaaMmXK1wAffPBBvWHDhjXNzs6e7O506dJl63322WdZs2bN1v3www+1Bw8e/N0+++wzc/bs2TXOPPPM1qNHj57SoUOH1fPmzaueu/0ZM2bU+fjjj6cuXry4+tZbb73thRdeuKB27do+YsSIRj169FiSO9+aNWvs0EMPbdOxY8dfb7nllrlz5sypceONN7Z4//33pzVs2DDnsssu2/i6665rfu211849/fTT27z11ltTt99++1WHHXZY69tuuy3ryiuvnA/QqFGjtdOmTfv6P//5T9Ozzjpr01GjRs24+eabW7z11lvT2rRps2bhwoXV/7qX0nPvvfc223vvvZcUPecfVLNRCsaMgR13hFGjQoddjz+uRENEpDzZeOONV/fo0WMFwPHHH7/o448/rl+tWjX69Omz6KGHHmqycOHC6uPGjavfu3fvv/yIjh49uv4BBxywuGHDhjmNGjXK6dWr1y+jRo1qANCiRYvV++yzz4o43wbdunVb1qFDh9UAzZs3//26So8ePRbXrVvXW7RosbZJkyZrfvrppxoA77zzTsNDDz10ae58AwYM2Dw30chd5zfffFOnW7duHTp06NDx6aefbvrDDz/U+vLLL+u0atVq1fbbb78K4KSTTlr04YcfNshdz4knnvgzwGmnnfbz+PHj6wN07dp1+bHHHtv69ttvb7Z27doS7cdXXnmlwZNPPtns7rvv/qk4y6lmYz24w333wXnnQcuW8PHH0KVL0lGJiJRfRdVAZIrleR5E7vgZZ5yxqFevXlvVqVPHDzrooF9qFrOBXb169XLSma927dq/3/pbvXp11q5da8uWLau2dOnS6q1bt16TO61r167LP/jgg4YrV66cV69ePXd3dt9996WvvPLKd6nrK6qBZrVqf9QlmJkDDBs27If33ntvgxEjRjTq0qVLx+zs7K833njj3xOis846q+Xbb7/dCCC3VifVmDFj6g4YMGDz1157bXrqculQzUYJLV8OxxwDZ50Vntiana1EQ0SkvJozZ06td955ZwOAoUOHNtl1112XA7Ru3XpN8+bN19x+++0t+vfvvzB3/ho1aviqVasMYO+9914+cuTIxjE5qDZy5MgN995772V5t7HXXnut+OyzzxpMmTKlFkDqZZT8vPbaaw123333P63n9NNPX9ijR48lBx544JZr1qxhr732WjF27Nj6ue0uli5dWu2rr76qvcMOO/w2a9asWrnlQ4YMabrHHnv8vq4hQ4Y0ARg8ePCGnTt3XgEwadKk2t27d19x1113zd5www3Xfvvtt39qd3HvvffOmjJlytf5JRrTp0+v1bt37y0feeSR73JrU4pDNRslMHkyHHEETJ0KN94IF10E1ZS2iYiUW61bt/7t3nvv3ah///712rZt+9sFF1ywIHda3759F9133301dtxxx99yy4499tgFW2+9dcdtt9125YgRI7475phjFu24445bAxx//PELdtttt1+nTp36px/rTTbZZO0999wz87DDDtsqJyeHpk2brvn444+nFxTTyJEjG/Xp0+eXvOVXX331vIEDB1Y//PDD2wwfPvy7Bx98cGbfvn23WL16tQFcddVVs7bffvtVDzzwwMzevXtvuW7dOnbYYYeVqe/pl19+qd6uXbuOtWrV8qeffvpbgIEDB7aaOXNmbXe33XfffenOO+/8a7r77/LLL2+xePHiGmedddbmEJKxiRMnTk53eXMv/526de3a1ceOHZt0GAA89RScdlpok/HUU9C9e9IRiYjkz8yy3b1r0nF8+eWXM3fYYYeFRc+ZGVOnTq114IEHtp0+ffqk/KafcMIJm3Xu3HnlwIEDyzTGjh07bj1+/PgpqZdYSkPLli23Gzt27OQWLVqUrGHGevjyyy+b7bDDDq3zlqtmI02rVsH554c2GrvtFrogb9ky6ahERGR9bLPNNlvXrVs358EHHyzztiRff/112jUDFZ2SjTR8/z306QOffRYag958szrpEhGpKNq3b7+6oFqNSZMmVbof/FmzZk1IOoa8lGwU4Y034NhjYc0aeOEFOPzwpCMSEalwcnJyckwPY6vccnJyDMj37pyMNms0s0fMbL6ZTUwpa2Jmb5vZ9Ph3w0zGUFLr1sFVV8EBB4TLJdnZSjREREpo4oIFCxrFHyOphHJycmzBggWNgIn5Tc90zcZjwH+AISllFwPvuvvNZnZxHL8ow3EUy4IFoTbj7bfhxBPhv/8Nj4cXEZHiW7t2bb+5c+c+PHfu3G1RlwuVVQ4wce3atf3ym5jRZMPd3zez1nmKDwH2iq8fB0ZTjpKNTz4J7TMWLIBBg6BfPzDl4iIiJdalS5f5wMFJxyHJSSLDbO7uc+LruUC+j901s/5mNtbMxi5YsCC/WUqVO9xzT3gsfM2aoTfQ005ToiEiIrK+Eq3O8tDJR74Nhtx9kLt3dfeuWVlZGY1j2TLo2xfOOQd69gztM3bcMaObFBERqTKSSDbmmVkLgPh3fgIx/G7SJNhpJ3j+ebjpJhg+HDYsl01WRUREKqYkko0RwInx9YnAywnEAMDQodCtG/zyC7zzDlx8sbodFxERKW2ZvvX1KeAToL2Z/WRmpwI3A/uZ2XRg3zheplatggED4LjjwsPTxo+Hvfcu6yhERESqhkzfjXJ0AZP2yeR2C/P999C7N3z+OVx4YXiQWg11bSYiIpIxVepnduTIUJuxbh289BIcemjSEYmIiFR+VaKFwrp1cMUV0KsXbLZZuNtEiYaIiEjZqPQ1G/PnwzHHwLvvwsknh6e21q2bdFQiIiJVR6VONiZMCP1mLFoEgwfDKackHZGIiEjVU6kvo2y6KWyzTeiCXImGiIhIMip1zUbjxvDmm0lHISIiUrVV6poNERERSZ6SDREREckoJRsiIiKSUUo2REREJKOUbIiIiEhGKdkQERGRjFKyISIiIhmlZENEREQyytw96RiKZGYLgO/XYxXNgIWlFE5pUlzFo7iKR3EVT2WMa3N3zyrNYERKokIkG+vLzMa6e9ek48hLcRWP4ioexVU8ikskc3QZRURERDJKyYaIiIhkVFVJNgYlHUABFFfxKK7iUVzFo7hEMqRKtNkQERGR5FSVmg0RERFJiJINERERyagKmWyY2aZmNsrMvjazSWZ2TixvYmZvm9n0+HfDWG5mdo+ZzTCzr8xsx5R1nRjnn25mJ65nXHXM7DMz+zLGdU0sb2NmY+L2nzGzWrG8dhyfEae3TlnXJbF8qpn9fX3iSllndTMbb2avlpe4zGymmU0wsy/MbGwsS/RzjOtrbGbPm9kUM5tsZrskHZeZtY/7KXdYambnJh1XXN/AeMxPNLOn4nehPBxf58SYJpnZubGszPeXmT1iZvPNbGJKWanFYWZd4vdoRlzWSrbHRDLE3SvcALQAdoyvGwDTgI7ArcDFsfxi4Jb4+gDgdcCAnYExsbwJ8G38u2F8veF6xGVA/fi6JjAmbu9ZoG8sfwA4I74eADwQX/cFnomvOwJfArWBNsA3QPVS2G/nAcOAV+N44nEBM4FmecoS/RzjOh8H+sXXtYDG5SGulPiqA3OBzZOOC2gJfAfUTTmuTkr6+AK2BSYC9YAawDvAVknsL+D/gB2BiZk4zoHP4rwWl+1ZGseZBg2lNSQeQKm8CXgZ2A+YCrSIZS2AqfH1g8DRKfNPjdOPBh5MKf/TfOsZUz1gHPA3Qu9/NWL5LsCb8fWbwC7xdY04nwGXAJekrOv3+dYjnlbAu0B34NW4nfIQ10z+mmwk+jkCjQg/nlae4soTSw/go/IQFyHZ+JHwI1gjHl9/T/r4AnoDg1PGrwD+ldT+Alrz52SjVOKI06aklP9pPg0aysNQIS+jpIpVsJ0JtQjN3X1OnDQXaB5f554Mc/0UywoqX594qpvZF8B84G3Cf2eL3X1tPtv4fftx+hKgaSbiAu4inGhz4njTchKXA2+ZWbaZ9Y9lSX+ObYAFwKMWLjs9bGYblIO4UvUFnoqvE43L3WcB/wZ+AOYQjpdskj++JgJ7mFlTM6tHqDHYlPLzOZZWHC3j69KOT6TUVOhkw8zqAy8A57r70tRp7u6EH7Iy5e7r3L0ToSahG9ChrGPIy8wOBOa7e3bSseRjd3ffEegJ/NPM/i91YkKfYw1Clff97t4ZWEGo5k46LgBi24eDgefyTksirtjW4BBCkrYJsAGwf1nGkB93nwzcArwFvAF8AazLM09in2N5jEMkUypssmFmNQmJxlB3fzEWzzOzFnF6C0LtAsAswn80uVrFsoLK15u7LwZGEaqPG5tZjXy28fv24/RGwKIMxLUbcLCZzQSeJlxKubscxJX7XzHuPh94iZCgJf05/gT85O5j4vjzhOQj6bhy9QTGufu8OJ50XPsC37n7AndfA7xIOObKw/E12N27uPv/Ab8Q2nclvb9ylVYcs+Lr0o5PpNRUyGQjtrQeDEx29ztSJo0Aclton0hoy5FbfkJs5b0zsCRWX74J9DCzDeN/Zz1iWUnjyjKzxvF1XUI7ksmEpOPIAuLKjfdI4L34H84IoG9std8GaEtoAFYi7n6Ju7dy99aE6vf33P3YpOMysw3MrEHua8L+n0jCn6O7zwV+NLP2sWgf4Ouk40pxNH9cQsndfpJx/QDsbGb14nczd38lenwBmNlG8e9mwOGEBtJJ769cpRJHnLbUzHaO+/+ElHWJlA9JNxopyQDsTqhy/IpQNfoF4XpsU0IjyOmEludN4vwG3EdoPzEB6JqyrlOAGXE4eT3j2h4YH+OaCFwZy7cgnDRnEKq+a8fyOnF8Rpy+Rcq6LovxTqUUW5YDe/HH3SiJxhW3/2UcJgGXxfJEP8e4vk7A2PhZDie0/i8PcW1AqAVolFJWHuK6BpgSj/snCHeUJH7cAx8QEp8vgX2S2l+E5HAOsIZQc3ZqacYBdI37/hvgP+Rp3KxBQ9KDuisXERGRjKqQl1FERESk4lCyISIiIhmlZENEREQySsmGiIiIZJSSDREREckoJRtSqZnZOgtPSP3SzMaZ2a5FzN/YzAaksd7RZta1hDGNzO2PRUSkKlCyIZXdr+7eyd13IDzo66Yi5m9MeCppxrj7AR56mBURqRKUbEhV0pDQZTVmVt/M3o21HRPM7JA4z83AlrE25LY470Vxni/N7OaU9fU2s8/MbJqZ7ZF3Y2bWwszej+uamDuPmc00s2Zm9o847Qsz+87MRsXpPczskxjbcxaeASQiUmGpUy+p1MxsHaEXxjqER3F3d/fs+EyOeu6+1MyaAZ8SusfenNDD6rZx+Z6ER5Pv6+4rzayJu/9sZqOBbHc/38wOAM5z933zbPt8oI6732Bm1eP2lll4Rk1Xd18Y56sJvAfcCnxCeLZIT3dfYWYXEXrevDaT+0lEJJNqFD2LSIX2q4en8GJmuwBDzGxbQpfQN1p4ymwO4ZHczfNZfl/gUXdfCeDuP6dMy30AYDbQOp9lPwceicnEcHf/ooAY7yY8H+QVC0/o7Qh8FB5zQS1CAiIiUmEp2ZAqw90/ibUYWYRn6WQBXdx9TaxtqFPMVa6Kf9eRz3fJ3d+PyUwv4DEzu8Pdh6TOY2YnEWpTzswtAt5296OLGYuISLmlNhtSZZhZB6A68UFmwPyYaOxN+MEHWAY0SFnsbeBkM6sX19GkGNvbHJjn7g8BDxMeU586vQtwAXCcu+fE4k+B3cxsqzjPBmbWrnjvVESkfFHNhlR2dc0s9/KFASe6+zozGwq8YmYTCE93nQLg7ovM7CMzmwi87u4XmlknYKyZrQZGApemvNaUIQAAAGtJREFUue29gAvNbA2wnPDo71RnAk2AUfGSyVh37xdrO54ys9pxvsuBacV+5yIi5YQaiIqIiEhG6TKKiIiIZJSSDREREckoJRsiIiKSUUo2REREJKOUbIiIiEhGKdkQERGRjFKyISIiIhn1/2p/esEbfqUvAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_relative_time(results, [2], max_batch_size=max_batch_size)" + ] + }, + { + "cell_type": "markdown", + "id": "f7dc206c", + "metadata": {}, + "source": [ + "## Conclusion\n", + "\n", + "As illustrated in the experiments, KeOps allows you to drastically speed up and scale up drift detection to larger datasets without running into memory issues. The speed benefit of KeOps over the PyTorch (or TensorFlow) MMD detector decreases as the number of features increases. Note though that it is not advised to apply the (untrained) MMD detector to very high-dimensional data in the first place." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [conda env:detect]", + "language": "python", + "name": "conda-env-detect-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 244ceb25573ddfd9abaa0eb3eff9126b922f14b2 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 14:54:30 +0100 Subject: [PATCH 13/50] update test mmd --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 0 alibi_detect/cd/tests/test_mmd.py | 5 ++++- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 alibi_detect/cd/keops/tests/test_mmd_keops.py diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py new file mode 100644 index 000000000..e69de29bb diff --git a/alibi_detect/cd/tests/test_mmd.py b/alibi_detect/cd/tests/test_mmd.py index 33e776e14..b2fa8cbb5 100644 --- a/alibi_detect/cd/tests/test_mmd.py +++ b/alibi_detect/cd/tests/test_mmd.py @@ -1,12 +1,13 @@ import numpy as np import pytest from alibi_detect.cd import MMDDrift +from alibi_detect.cd.keops.mmd import MMDDriftKeops from alibi_detect.cd.pytorch.mmd import MMDDriftTorch from alibi_detect.cd.tensorflow.mmd import MMDDriftTF n, n_features = 100, 5 -tests_mmddrift = ['tensorflow', 'pytorch', 'PyToRcH', 'mxnet'] +tests_mmddrift = ['tensorflow', 'pytorch', 'keops', 'PyToRcH', 'mxnet'] n_tests = len(tests_mmddrift) @@ -29,5 +30,7 @@ def test_mmddrift(mmddrift_params): assert isinstance(cd._detector, MMDDriftTorch) elif backend.lower() == 'tensorflow': assert isinstance(cd._detector, MMDDriftTF) + elif backend.lower() == 'keops': + assert isinstance(cd._detector, MMDDriftKeops) else: assert cd is None From f913f5b9c40c92b56e43c96bf7c4eb9858fbe9fb Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 15:27:17 +0100 Subject: [PATCH 14/50] add test mmd keops --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 103 ++++++++++++++++++ .../utils/keops/tests/test_kernels_keops.py | 0 2 files changed, 103 insertions(+) create mode 100644 alibi_detect/utils/keops/tests/test_kernels_keops.py diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index e69de29bb..6e04e7ebc 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -0,0 +1,103 @@ +from functools import partial +from itertools import product +import numpy as np +import pytest +import torch +import torch.nn as nn +from typing import Callable, List +from alibi_detect.cd.keops.mmd import MMDDriftKeops +from alibi_detect.cd.pytorch.preprocess import HiddenOutput, preprocess_drift + +n, n_hidden, n_classes = 500, 10, 5 + + +class MyModel(nn.Module): + def __init__(self, n_features: int): + super().__init__() + self.dense1 = nn.Linear(n_features, 20) + self.dense2 = nn.Linear(20, 2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = nn.ReLU()(self.dense1(x)) + return self.dense2(x) + + +# test List[Any] inputs to the detector +def preprocess_list(x: List[np.ndarray]) -> np.ndarray: + return np.concatenate(x, axis=0) + + +n_features = [10] +n_enc = [None, 3] +preprocess = [ + (None, None), + (preprocess_drift, {'model': HiddenOutput, 'layer': -1}), + (preprocess_list, None) +] +update_x_ref = [{'last': 750}, {'reservoir_sampling': 750}, None] +preprocess_x_ref = [True, False] +n_permutations = [10] +batch_size_permutations = [10, 1000000] +configure_kernel_from_x_ref = [True, False] +tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, + batch_size_permutations, configure_kernel_from_x_ref)) +n_tests = len(tests_mmddrift) + + +@pytest.fixture +def mmd_params(request): + return tests_mmddrift[request.param] + + +@pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True) +def test_mmd(mmd_params): + n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, \ + batch_size_permutations, configure_kernel_from_x_ref = mmd_params + + np.random.seed(0) + torch.manual_seed(0) + + x_ref = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32) + preprocess_fn, preprocess_kwargs = preprocess + to_list = False + if hasattr(preprocess_fn, '__name__') and preprocess_fn.__name__ == 'preprocess_list': + if not preprocess_x_ref: + return + to_list = True + x_ref = [_[None, :] for _ in x_ref] + elif isinstance(preprocess_fn, Callable) and 'layer' in list(preprocess_kwargs.keys()) \ + and preprocess_kwargs['model'].__name__ == 'HiddenOutput': + model = MyModel(n_features) + layer = preprocess_kwargs['layer'] + preprocess_fn = partial(preprocess_fn, model=HiddenOutput(model=model, layer=layer)) + else: + preprocess_fn = None + + cd = MMDDriftKeops( + x_ref=x_ref, + p_val=.05, + preprocess_x_ref=preprocess_x_ref if isinstance(preprocess_fn, Callable) else False, + update_x_ref=update_x_ref, + preprocess_fn=preprocess_fn, + configure_kernel_from_x_ref=configure_kernel_from_x_ref, + n_permutations=n_permutations, + batch_size_permutations=batch_size_permutations + ) + x = x_ref.copy() + preds = cd.predict(x, return_p_val=True) + assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val + if isinstance(update_x_ref, dict): + k = list(update_x_ref.keys())[0] + assert cd.n == len(x) + len(x_ref) + assert cd.x_ref.shape[0] == min(update_x_ref[k], len(x) + len(x_ref)) + + x_h1 = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32) + if to_list: + x_h1 = [_[None, :] for _ in x_h1] + preds = cd.predict(x_h1, return_p_val=True) + if preds['data']['is_drift'] == 1: + assert preds['data']['p_val'] < preds['data']['threshold'] == cd.p_val + assert preds['data']['distance'] > preds['data']['distance_threshold'] + else: + assert preds['data']['p_val'] >= preds['data']['threshold'] == cd.p_val + assert preds['data']['distance'] <= preds['data']['distance_threshold'] diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py new file mode 100644 index 000000000..e69de29bb From 2da4a9a36072f8ab9d8a7cfff705aefb108541ec Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 15:31:59 +0100 Subject: [PATCH 15/50] update readme --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 72fbfdb0f..97edbbf3f 100644 --- a/README.md +++ b/README.md @@ -181,8 +181,9 @@ The following tables show the advised use cases for each algorithm. The column * #### TensorFlow and PyTorch support -The drift detectors support TensorFlow and PyTorch backends. Alibi Detect does however not install PyTorch for you. -Check the [PyTorch docs](https://pytorch.org/) how to do this. Example: +The drift detectors support TensorFlow, PyTorch and (where applicable) [KeOps](https://www.kernel-operations.io/keops/index.html) backends. +Alibi Detect does however not install PyTorch or KeOps for you. +Check the [PyTorch docs](https://pytorch.org/) how to do this. KeOps can be installed via pip: ```pip install pykeops```. Example: ```python from alibi_detect.cd import MMDDrift @@ -198,6 +199,13 @@ cd = MMDDrift(x_ref, backend='pytorch', p_val=.05) preds = cd.predict(x) ``` +Or in KeOps: + +```python +cd = MMDDrift(x_ref, backend='keops', p_val=.05) +preds = cd.predict(x) +``` + #### Built-in preprocessing steps Alibi Detect also comes with various preprocessing steps such as randomly initialized encoders, pretrained text From c49f1d2879d3191687652473cb6e0333df8ace67 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 18:30:27 +0100 Subject: [PATCH 16/50] bugfix kernel and update mmd test --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 2 ++ alibi_detect/utils/keops/kernels.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 6e04e7ebc..3843d3428 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -54,6 +54,8 @@ def test_mmd(mmd_params): n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, \ batch_size_permutations, configure_kernel_from_x_ref = mmd_params + print(configure_kernel_from_x_ref, batch_size_permutations, n_features, update_x_ref, preprocess_x_ref) + np.random.seed(0) torch.manual_seed(0) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 2f471841a..d0f826772 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -23,8 +23,8 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: The computed bandwidth, `sigma`. """ n = x.shape[0] - if (dist.min(axis=1) == 0.).all() and (torch.arange(n) == dist.argmin(axis=1).cpu().view(-1)).all() \ - and x.shape == y.shape: + if x.shape == y.shape and (dist.min(axis=1) == 0.).all() and \ + (torch.arange(n) == dist.argmin(axis=1).cpu().view(-1)).all(): n_mean = n * (n - 1) else: n_mean = np.prod(dist.shape) From 75481cc3e8730d0941f1263c5299fbaa2d8e42d2 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 18:31:06 +0100 Subject: [PATCH 17/50] remove print from test --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 3843d3428..6e04e7ebc 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -54,8 +54,6 @@ def test_mmd(mmd_params): n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, \ batch_size_permutations, configure_kernel_from_x_ref = mmd_params - print(configure_kernel_from_x_ref, batch_size_permutations, n_features, update_x_ref, preprocess_x_ref) - np.random.seed(0) torch.manual_seed(0) From e6996b937a308b913d4d1f5415c30c771db0e896 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Fri, 8 Jul 2022 19:25:42 +0100 Subject: [PATCH 18/50] update keops tests --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 10 +---- .../utils/keops/tests/test_kernels_keops.py | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 6e04e7ebc..6fc454d19 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -34,12 +34,11 @@ def preprocess_list(x: List[np.ndarray]) -> np.ndarray: (preprocess_drift, {'model': HiddenOutput, 'layer': -1}), (preprocess_list, None) ] -update_x_ref = [{'last': 750}, {'reservoir_sampling': 750}, None] preprocess_x_ref = [True, False] n_permutations = [10] batch_size_permutations = [10, 1000000] configure_kernel_from_x_ref = [True, False] -tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, +tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_x_ref, batch_size_permutations, configure_kernel_from_x_ref)) n_tests = len(tests_mmddrift) @@ -51,7 +50,7 @@ def mmd_params(request): @pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True) def test_mmd(mmd_params): - n_features, n_enc, preprocess, n_permutations, update_x_ref, preprocess_x_ref, \ + n_features, n_enc, preprocess, n_permutations, preprocess_x_ref, \ batch_size_permutations, configure_kernel_from_x_ref = mmd_params np.random.seed(0) @@ -77,7 +76,6 @@ def test_mmd(mmd_params): x_ref=x_ref, p_val=.05, preprocess_x_ref=preprocess_x_ref if isinstance(preprocess_fn, Callable) else False, - update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, @@ -86,10 +84,6 @@ def test_mmd(mmd_params): x = x_ref.copy() preds = cd.predict(x, return_p_val=True) assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val - if isinstance(update_x_ref, dict): - k = list(update_x_ref.keys())[0] - assert cd.n == len(x) + len(x_ref) - assert cd.x_ref.shape[0] == min(update_x_ref[k], len(x) + len(x_ref)) x_h1 = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32) if to_list: diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index e69de29bb..d42a7c0e1 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -0,0 +1,42 @@ +from itertools import product +import numpy as np +from pykeops.torch import LazyTensor +import pytest +import torch +from alibi_detect.utils.keops import GaussianRBF + +sigma = [None, np.array([1.]), np.array([1., 2.])] +n_features = [5, 10] +n_instances = [(100, 100), (100, 75)] +trainable = [True, False] +tests_gk = list(product(sigma, n_features, n_instances, trainable)) +n_tests_gk = len(tests_gk) + + +@pytest.fixture +def gaussian_kernel_params(request): + return tests_gk[request.param] + + +@pytest.mark.parametrize('gaussian_kernel_params', list(range(n_tests_gk)), indirect=True) +def test_gaussian_kernel(gaussian_kernel_params): + sigma, n_features, n_instances, trainable = gaussian_kernel_params + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + + print(sigma, xshape, yshape, trainable) + + sigma = sigma if sigma is None else torch.from_numpy(sigma).float() + x = torch.from_numpy(np.random.random(xshape)).float() + y = torch.from_numpy(np.random.random(yshape)).float() + + kernel = GaussianRBF(sigma=sigma, trainable=trainable) + infer_sigma = True if sigma is None else False + if trainable and infer_sigma: + with pytest.raises(Exception): + kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) + else: + k_xy = kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) + k_xx = kernel(LazyTensor(x[:, None, :]), LazyTensor(x[None, :, :]), infer_sigma=infer_sigma) + assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) + assert (torch.arange(xshape[0]) == k_xx.argmax(axis=1).cpu().view(-1)).all() + assert (k_xx.min(axis=1) >= 0.).all() and (k_xy.min(axis=1) >= 0.).all() From c87109f4d12a322d891da92a4221058694415189 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Tue, 26 Jul 2022 16:36:42 +0100 Subject: [PATCH 19/50] Add save warning and update tests --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 10 +++++----- alibi_detect/saving/saving.py | 4 ++-- alibi_detect/saving/schemas.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 6fc454d19..0ff6d9071 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -34,11 +34,11 @@ def preprocess_list(x: List[np.ndarray]) -> np.ndarray: (preprocess_drift, {'model': HiddenOutput, 'layer': -1}), (preprocess_list, None) ] -preprocess_x_ref = [True, False] +preprocess_at_init = [True, False] n_permutations = [10] batch_size_permutations = [10, 1000000] configure_kernel_from_x_ref = [True, False] -tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_x_ref, +tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_at_init, batch_size_permutations, configure_kernel_from_x_ref)) n_tests = len(tests_mmddrift) @@ -50,7 +50,7 @@ def mmd_params(request): @pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True) def test_mmd(mmd_params): - n_features, n_enc, preprocess, n_permutations, preprocess_x_ref, \ + n_features, n_enc, preprocess, n_permutations, preprocess_at_init, \ batch_size_permutations, configure_kernel_from_x_ref = mmd_params np.random.seed(0) @@ -60,7 +60,7 @@ def test_mmd(mmd_params): preprocess_fn, preprocess_kwargs = preprocess to_list = False if hasattr(preprocess_fn, '__name__') and preprocess_fn.__name__ == 'preprocess_list': - if not preprocess_x_ref: + if not preprocess_at_init: return to_list = True x_ref = [_[None, :] for _ in x_ref] @@ -75,7 +75,7 @@ def test_mmd(mmd_params): cd = MMDDriftKeops( x_ref=x_ref, p_val=.05, - preprocess_x_ref=preprocess_x_ref if isinstance(preprocess_fn, Callable) else False, + preprocess_at_init=preprocess_at_init if isinstance(preprocess_fn, Callable) else False, preprocess_fn=preprocess_fn, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, diff --git a/alibi_detect/saving/saving.py b/alibi_detect/saving/saving.py index 975fe2523..b1ae31983 100644 --- a/alibi_detect/saving/saving.py +++ b/alibi_detect/saving/saving.py @@ -46,8 +46,8 @@ def save_detector( if legacy: warnings.warn('The `legacy` option will be removed in a future version.', DeprecationWarning) - if 'backend' in list(detector.meta.keys()) and detector.meta['backend'] in ['pytorch', 'sklearn']: - raise NotImplementedError('Saving detectors with PyTorch or sklearn backend is not yet supported.') + if 'backend' in list(detector.meta.keys()) and detector.meta['backend'] in ['pytorch', 'sklearn', 'keops']: + raise NotImplementedError('Saving detectors with PyTorch, sklearn or keops backend is not yet supported.') # TODO: Replace .__args__ w/ typing.get_args() once Python 3.7 dropped (and remove type ignore below) detector_name = detector.__class__.__name__ diff --git a/alibi_detect/saving/schemas.py b/alibi_detect/saving/schemas.py index c80ce4db0..baba5ef96 100644 --- a/alibi_detect/saving/schemas.py +++ b/alibi_detect/saving/schemas.py @@ -98,7 +98,7 @@ class DetectorConfig(CustomBaseModel): """ name: str "Name of the detector e.g. `MMDDrift`." - backend: Literal['tensorflow', 'pytorch', 'sklearn'] = 'tensorflow' + backend: Literal['tensorflow', 'pytorch', 'sklearn', 'keops'] = 'tensorflow' "The detector backend." meta: Optional[MetaData] "Config metadata. Should not be edited." From eb307b65ae388f2b892291407a257920d3c52355 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Tue, 26 Jul 2022 17:16:37 +0100 Subject: [PATCH 20/50] Update setup and associated docs --- README.md | 7 +++++- doc/source/overview/getting_started.md | 32 +++++++++++++++++++++++--- examples/cd_mmd_keops.ipynb | 1 + setup.py | 5 +++- 4 files changed, 40 insertions(+), 5 deletions(-) create mode 120000 examples/cd_mmd_keops.ipynb diff --git a/README.md b/README.md index d36f9d916..fadcbd6f4 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The package, `alibi-detect` can be installed from: pip install git+https://github.com/SeldonIO/alibi-detect.git ``` -- To install with the tensorflow backend: +- To install with the TensorFlow backend: ```bash pip install alibi-detect[tensorflow] ``` @@ -88,6 +88,11 @@ The package, `alibi-detect` can be installed from: pip install alibi-detect[torch] ``` +- To install with the KeOps backend: + ```bash + pip install alibi-detect[keops] + ``` + - To use the `Prophet` time series outlier detector: ```bash diff --git a/doc/source/overview/getting_started.md b/doc/source/overview/getting_started.md index 922a17543..ee83a09b2 100644 --- a/doc/source/overview/getting_started.md +++ b/doc/source/overview/getting_started.md @@ -155,6 +155,31 @@ The TensorFlow installation is required to use the following detectors: ``` ```` +````{tab-item} KeOps +:sync: label-keops +:class-label: sd-pt-0 + +```{div} sd-mb-1 +Installation with [KeOps](https://www.kernel-operations.io) backend. +``` + +```bash +pip install alibi-detect[keops] +``` + +```{div} sd-mb-1 +The KeOps installation is required to use the KeOps backend for the following detectors: +- [MMDDrift](../cd/methods/mmddrift.ipynb) +``` + +```{note} +KeOps requires a C++ compiler compatible with `std=c++11`, for example `g++ >=7` or `clang++ >=8`, and a +[Cuda toolkit](https://developer.nvidia.com/cuda-toolkit) installation. For more detailed version requirements +and testing instructions for KeOps, see the +[KeOps docs](https://www.kernel-operations.io/keops/python/installation.html). +``` +```` + ````{tab-item} Prophet :class-label: sd-pt-0 @@ -199,9 +224,10 @@ mamba install -c conda-forge alibi-detect [Alibi Detect](https://github.com/SeldonIO/alibi-detect) is an open source Python library focused on **outlier**, **adversarial** and **drift** detection. The package aims to cover both -online and offline detectors for tabular data, text, images and time series. -Both **TensorFlow** and **PyTorch** backends are supported for drift detection. Alibi-Detect does not install these as -default. See [installation options](#installation) for more details. +online and offline detectors for tabular data, text, images and time series. **TensorFlow**, **PyTorch** +and (where applicable) [KeOps](https://www.kernel-operations.io/keops/index.html) backends are supported +for drift detection. Alibi-Detect does not install these as default. See [installation options](#installation) +for more details. To get a list of respectively the latest outlier, adversarial and drift detection algorithms, you can type: diff --git a/examples/cd_mmd_keops.ipynb b/examples/cd_mmd_keops.ipynb new file mode 120000 index 000000000..fddcc9f46 --- /dev/null +++ b/examples/cd_mmd_keops.ipynb @@ -0,0 +1 @@ +../doc/source/examples/cd_mmd_keops.ipynb \ No newline at end of file diff --git a/setup.py b/setup.py index dc902fa91..5fc64cad9 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,10 @@ def readme(): "tensorflow_probability>=0.8.0, <0.18.0", "tensorflow>=2.2.0, !=2.6.0, !=2.6.1, <2.10.0", # https://github.com/SeldonIO/alibi-detect/issues/375 and 387 ], - 'all': [ + "keops": [ + "pykeops>=2.0.0, <2.2.0", + ], + "all": [ "fbprophet>=0.5, <0.7", "holidays==0.9.11", "pystan<3.0", From 0db2239e53996e1c56b01758dfb1dc2f7eafa2a0 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 27 Jul 2022 11:28:48 +0100 Subject: [PATCH 21/50] Fix typing issue in --- alibi_detect/cd/keops/mmd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index bebb63382..dc8244dd6 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -167,7 +167,8 @@ def score(self, x: Union[np.ndarray, list]) -> Tuple[float, float, float]: # compute kernel matrix, MMD^2 and apply permutation test m, n = x_ref.shape[0], x.shape[0] perms = [torch.randperm(m + n) for _ in range(self.n_permutations)] - x_all = torch.cat([x_ref, x], 0) + # TODO - Rethink typings (related to https://github.com/SeldonIO/alibi-detect/issues/540) + x_all = torch.cat([x_ref, x], 0) # type: ignore[list-item] mmd2, mmd2_permuted = self._mmd2(x_all, perms, m, n) if self.device.type == 'cuda': mmd2, mmd2_permuted = mmd2.cpu(), mmd2_permuted.cpu() From 45e7211263c3de3e55354166b2b6bb4327c5f741 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Wed, 27 Jul 2022 11:37:01 +0100 Subject: [PATCH 22/50] Install keops as part of CI --- .github/workflows/ci.yml | 2 +- .github/workflows/test_all_notebooks.yml | 2 +- .github/workflows/test_changed_notebooks.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a317e0de..2ceb446bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch] + python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch,keops] python -m pip freeze - name: Lint with flake8 diff --git a/.github/workflows/test_all_notebooks.yml b/.github/workflows/test_all_notebooks.yml index 1787edf5f..abf59df41 100644 --- a/.github/workflows/test_all_notebooks.yml +++ b/.github/workflows/test_all_notebooks.yml @@ -44,7 +44,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[torch] + python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch,keops] python -m pip freeze - name: Run notebooks diff --git a/.github/workflows/test_changed_notebooks.yml b/.github/workflows/test_changed_notebooks.yml index 81afd216f..5ac7c19f3 100644 --- a/.github/workflows/test_changed_notebooks.yml +++ b/.github/workflows/test_changed_notebooks.yml @@ -59,7 +59,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow] + python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow,keops] python -m pip freeze - name: Run notebooks From 9cee6bccdeb382605408056f6f08a209da3ba104 Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Wed, 27 Jul 2022 15:53:31 +0100 Subject: [PATCH 23/50] Add keops tox environment --- alibi_detect/cd/mmd.py | 4 +-- alibi_detect/tests/test_dep_management.py | 34 +++++++++++------------ setup.cfg | 12 ++++++++ setup.py | 1 + 4 files changed, 32 insertions(+), 19 deletions(-) diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index fc5d6d789..9236d706e 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -11,7 +11,7 @@ if has_tensorflow: from alibi_detect.cd.tensorflow.mmd import MMDDriftTF -if has_keops: +if has_keops and has_pytorch: from alibi_detect.cd.keops.mmd import MMDDriftKeops logger = logging.getLogger(__name__) @@ -90,7 +90,7 @@ def __init__( BackendValidator( backend_options={'tensorflow': ['tensorflow'], 'pytorch': ['pytorch'], - 'keops': ['keops']}, + 'keops': ['keops', 'pytorch']}, construct_name=self.__class__.__name__ ).verify_backend(backend) diff --git a/alibi_detect/tests/test_dep_management.py b/alibi_detect/tests/test_dep_management.py index 4ee06fd8f..f00cda3a0 100644 --- a/alibi_detect/tests/test_dep_management.py +++ b/alibi_detect/tests/test_dep_management.py @@ -66,8 +66,8 @@ def test_cd_torch_dependencies(opt_dep): dependency_map = defaultdict(lambda: ['default']) for dependency, relations in [ - ("HiddenOutput", ['torch']), - ("preprocess_drift", ['torch']) + ("HiddenOutput", ['torch', 'keops']), + ("preprocess_drift", ['torch', 'keops']) ]: dependency_map[dependency] = relations from alibi_detect.cd import pytorch as cd_pytorch @@ -156,8 +156,8 @@ def test_torch_model_dependencies(opt_dep): dependency_map = defaultdict(lambda: ['default']) for dependency, relations in [ - ("TransformerEmbedding", ['torch']), - ("trainer", ['torch']), + ("TransformerEmbedding", ['torch', 'keops']), + ("trainer", ['torch', 'keops']), ]: dependency_map[dependency] = relations from alibi_detect.models import pytorch as torch_models @@ -255,19 +255,19 @@ def test_torch_utils_dependencies(opt_dep): dependency_map = defaultdict(lambda: ['default']) for dependency, relations in [ - ("batch_compute_kernel_matrix", ['torch']), - ("mmd2", ['torch']), - ("mmd2_from_kernel_matrix", ['torch']), - ("squared_pairwise_distance", ['torch']), - ("GaussianRBF", ['torch']), - ("DeepKernel", ['torch']), - ("permed_lsdds", ['torch']), - ("predict_batch", ['torch']), - ("predict_batch_transformer", ['torch']), - ("quantile", ['torch']), - ("zero_diag", ['torch']), - ("TorchDataset", ['torch']), - ("get_device", ['torch']), + ("batch_compute_kernel_matrix", ['torch', 'keops']), + ("mmd2", ['torch', 'keops']), + ("mmd2_from_kernel_matrix", ['torch', 'keops']), + ("squared_pairwise_distance", ['torch', 'keops']), + ("GaussianRBF", ['torch', 'keops']), + ("DeepKernel", ['torch', 'keops']), + ("permed_lsdds", ['torch', 'keops']), + ("predict_batch", ['torch', 'keops']), + ("predict_batch_transformer", ['torch', 'keops']), + ("quantile", ['torch', 'keops']), + ("zero_diag", ['torch', 'keops']), + ("TorchDataset", ['torch', 'keops']), + ("get_device", ['torch', 'keops']), ]: dependency_map[dependency] = relations from alibi_detect.utils import pytorch as pytorch_utils diff --git a/setup.cfg b/setup.cfg index 29350aa97..926613e4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ envlist= tensorflow torch prophet + keops all # tox test environment for generating licenses @@ -112,6 +113,17 @@ extras= commands = {env:COMMAND:pytest --no-cov alibi_detect/tests/test_dep_management.py --opt-dep=prophet} +# tox test environment for testing keops optional dependency imports +[testenv:keops] +basepython = python +deps = pytest + pytest-cov + pytest-randomly +extras= + keops +commands = + {env:COMMAND:pytest --no-cov alibi_detect/tests/test_dep_management.py --opt-dep=keops} + # environment for testing imports with all optional dependencies installed [testenv:all] basepython = python diff --git a/setup.py b/setup.py index 5fc64cad9..683f49be1 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,7 @@ def readme(): ], "keops": [ "pykeops>=2.0.0, <2.2.0", + "torch>=1.7.0" ], "all": [ "fbprophet>=0.5, <0.7", From 3fa460cc9a2463e585fba56e4bf5f52e4a0ecac6 Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Thu, 28 Jul 2022 09:48:15 +0100 Subject: [PATCH 24/50] Add keops to all dependency bucket --- doc/source/examples/cd_mmd_keops.ipynb | 10 +++++----- setup.py | 3 ++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/source/examples/cd_mmd_keops.ipynb b/doc/source/examples/cd_mmd_keops.ipynb index 280170883..b8f486b4b 100644 --- a/doc/source/examples/cd_mmd_keops.ipynb +++ b/doc/source/examples/cd_mmd_keops.ipynb @@ -34,7 +34,7 @@ "\n", "## Requirements\n", "\n", - "The notebook requires [PyTorch](https://pytorch.org/) and KeOps to be installed. Once PyTorch is installed, KeOps can be installed via pip:" + "The notebook requires [PyTorch](https://pytorch.org/) and KeOps to be installed. These are optional dependencies for $\\texttt{Alibi Detect}$ and can be installed using:" ] }, { @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install pykeops" + "!pip install alibi-detect[keops]" ] }, { @@ -491,9 +491,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:detect]", + "display_name": "Python 3", "language": "python", - "name": "conda-env-detect-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -505,7 +505,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.13" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index 683f49be1..b284100cd 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,8 @@ def readme(): "pystan<3.0", "tensorflow_probability>=0.8.0, <0.18.0", "tensorflow>=2.2.0, !=2.6.0, !=2.6.1, <2.10.0", # https://github.com/SeldonIO/alibi-detect/issues/375 and 387 - "torch>=1.7.0" + "torch>=1.7.0", + "pykeops>=2.0.0, <2.2.0" ], } From 82f6d3c71779026f01f6cd77ab7d2bb245e335f3 Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Thu, 28 Jul 2022 15:10:37 +0100 Subject: [PATCH 25/50] Fix minor issue --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d26a8430f..f7dc86e60 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,7 @@ def readme(): "pystan<3.0", "tensorflow_probability>=0.8.0, <0.18.0", "tensorflow>=2.2.0, !=2.6.0, !=2.6.1, <2.10.0", # https://github.com/SeldonIO/alibi-detect/issues/375 and 387 - - "pykeops>=2.0.0, <2.2.0" + "pykeops>=2.0.0, <2.2.0", "torch>=1.7.0, <1.13.0" ], } From 71142d79b88c1b98085c86fbf5bf8e027fd6870f Mon Sep 17 00:00:00 2001 From: Alex Athorne Date: Thu, 28 Jul 2022 15:55:38 +0100 Subject: [PATCH 26/50] Protect GaussianRBF with import optional --- alibi_detect/tests/test_dep_management.py | 13 +++++++++++++ alibi_detect/utils/keops/__init__.py | 5 ++++- alibi_detect/utils/missing_optional_dependency.py | 3 ++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/alibi_detect/tests/test_dep_management.py b/alibi_detect/tests/test_dep_management.py index f00cda3a0..4333d93c9 100644 --- a/alibi_detect/tests/test_dep_management.py +++ b/alibi_detect/tests/test_dep_management.py @@ -272,3 +272,16 @@ def test_torch_utils_dependencies(opt_dep): dependency_map[dependency] = relations from alibi_detect.utils import pytorch as pytorch_utils check_correct_dependencies(pytorch_utils, dependency_map, opt_dep) + + +def test_keops_utils_dependencies(opt_dep): + """Tests that the keops utils module correctly protects against uninstalled optional dependencies. + """ + + dependency_map = defaultdict(lambda: ['default']) + for dependency, relations in [ + ("GaussianRBF", ['keops']), + ]: + dependency_map[dependency] = relations + from alibi_detect.utils import keops as keops_utils + check_correct_dependencies(keops_utils, dependency_map, opt_dep) diff --git a/alibi_detect/utils/keops/__init__.py b/alibi_detect/utils/keops/__init__.py index 235176e6b..9da9f5073 100644 --- a/alibi_detect/utils/keops/__init__.py +++ b/alibi_detect/utils/keops/__init__.py @@ -1,4 +1,7 @@ -from .kernels import GaussianRBF +from alibi_detect.utils.missing_optional_dependency import import_optional + + +GaussianRBF = import_optional('alibi_detect.utils.keops.kernels', names=['GaussianRBF']) __all__ = [ "GaussianRBF" diff --git a/alibi_detect/utils/missing_optional_dependency.py b/alibi_detect/utils/missing_optional_dependency.py index 4d6331286..dfe96d656 100644 --- a/alibi_detect/utils/missing_optional_dependency.py +++ b/alibi_detect/utils/missing_optional_dependency.py @@ -25,7 +25,8 @@ "tensorflow_probability": 'tensorflow', "tensorflow": 'tensorflow', "torch": 'torch', - "pytorch": 'torch' + "pytorch": 'torch', + "pykeops": 'keops' } From 9c2da77734c1d30cbb8da8f9312da23a5441c249 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Thu, 28 Jul 2022 18:50:46 +0100 Subject: [PATCH 27/50] Skip keops tests on Windows, and keops notebook test. Fix backend validator. --- .github/workflows/ci.yml | 5 ++++- .github/workflows/test_all_notebooks.yml | 2 +- .github/workflows/test_changed_notebooks.yml | 2 +- alibi_detect/cd/keops/tests/test_mmd_keops.py | 5 ++++- alibi_detect/cd/mmd.py | 2 +- alibi_detect/cd/tests/test_mmd.py | 10 ++++++---- alibi_detect/utils/missing_optional_dependency.py | 2 +- testing/test_notebooks.py | 1 + 8 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ceb446bf..ce81af98b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,10 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch,keops] + if [ "$RUNNER_OS" != "Windows" ]; then # Skip KeOps tests on Windows as KeOps not supported. + python -m pip install --upgrade --upgrade-strategy eager -e .[keops] + fi + python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch] python -m pip freeze - name: Lint with flake8 diff --git a/.github/workflows/test_all_notebooks.yml b/.github/workflows/test_all_notebooks.yml index 5b37862ef..84ba3064b 100644 --- a/.github/workflows/test_all_notebooks.yml +++ b/.github/workflows/test_all_notebooks.yml @@ -44,7 +44,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow,keops] + python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow] python -m pip freeze - name: Run notebooks diff --git a/.github/workflows/test_changed_notebooks.yml b/.github/workflows/test_changed_notebooks.yml index 5ac7c19f3..81afd216f 100644 --- a/.github/workflows/test_changed_notebooks.yml +++ b/.github/workflows/test_changed_notebooks.yml @@ -59,7 +59,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow,keops] + python -m pip install --upgrade --upgrade-strategy eager -e .[torch,tensorflow] python -m pip freeze - name: Run notebooks diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 0ff6d9071..8ebfdc647 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -5,8 +5,10 @@ import torch import torch.nn as nn from typing import Callable, List -from alibi_detect.cd.keops.mmd import MMDDriftKeops +from alibi_detect.utils.frameworks import has_keops from alibi_detect.cd.pytorch.preprocess import HiddenOutput, preprocess_drift +if has_keops: + from alibi_detect.cd.keops.mmd import MMDDriftKeops n, n_hidden, n_classes = 500, 10, 5 @@ -48,6 +50,7 @@ def mmd_params(request): return tests_mmddrift[request.param] +@pytest.mark.skipif(not has_keops, reason='Skipping since pykeops is not installed.') @pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True) def test_mmd(mmd_params): n_features, n_enc, preprocess, n_permutations, preprocess_at_init, \ diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 9236d706e..74ad8152f 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -90,7 +90,7 @@ def __init__( BackendValidator( backend_options={'tensorflow': ['tensorflow'], 'pytorch': ['pytorch'], - 'keops': ['keops', 'pytorch']}, + 'keops': ['keops']}, construct_name=self.__class__.__name__ ).verify_backend(backend) diff --git a/alibi_detect/cd/tests/test_mmd.py b/alibi_detect/cd/tests/test_mmd.py index b2fa8cbb5..c070dcaeb 100644 --- a/alibi_detect/cd/tests/test_mmd.py +++ b/alibi_detect/cd/tests/test_mmd.py @@ -1,9 +1,11 @@ import numpy as np import pytest from alibi_detect.cd import MMDDrift -from alibi_detect.cd.keops.mmd import MMDDriftKeops from alibi_detect.cd.pytorch.mmd import MMDDriftTorch from alibi_detect.cd.tensorflow.mmd import MMDDriftTF +from alibi_detect.utils.frameworks import has_keops +if has_keops: + from alibi_detect.cd.keops.mmd import MMDDriftKeops n, n_features = 100, 5 @@ -19,18 +21,18 @@ def mmddrift_params(request): @pytest.mark.parametrize('mmddrift_params', list(range(n_tests)), indirect=True) def test_mmddrift(mmddrift_params): backend = mmddrift_params - x_ref = np.random.randn(*(n, n_features)) + x_ref = np.random.randn(*(n, n_features)).astype('float32') try: cd = MMDDrift(x_ref=x_ref, backend=backend) - except NotImplementedError: + except (NotImplementedError, ImportError): cd = None if backend.lower() == 'pytorch': assert isinstance(cd._detector, MMDDriftTorch) elif backend.lower() == 'tensorflow': assert isinstance(cd._detector, MMDDriftTF) - elif backend.lower() == 'keops': + elif backend.lower() == 'keops' and has_keops: assert isinstance(cd._detector, MMDDriftKeops) else: assert cd is None diff --git a/alibi_detect/utils/missing_optional_dependency.py b/alibi_detect/utils/missing_optional_dependency.py index dfe96d656..6e4f80bad 100644 --- a/alibi_detect/utils/missing_optional_dependency.py +++ b/alibi_detect/utils/missing_optional_dependency.py @@ -26,7 +26,7 @@ "tensorflow": 'tensorflow', "torch": 'torch', "pytorch": 'torch', - "pykeops": 'keops' + "keops": 'keops', } diff --git a/testing/test_notebooks.py b/testing/test_notebooks.py index 48a94c264..d885f4c9c 100644 --- a/testing/test_notebooks.py +++ b/testing/test_notebooks.py @@ -38,6 +38,7 @@ 'cd_context_20newsgroup.ipynb', 'cd_context_ecg.ipynb', 'cd_text_imdb.ipynb', + 'cd_mmd_keops.ipynb', # the following requires a k8s cluster 'alibi_detect_deploy.ipynb', # the following require downloading large datasets From b126a6d05019052d1fc5b52f01f6fb7541ec36ec Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Thu, 28 Jul 2022 19:21:06 +0100 Subject: [PATCH 28/50] Skip keops kernel tests if not installed --- alibi_detect/utils/keops/tests/test_kernels_keops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index d42a7c0e1..abdb5fd92 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -1,9 +1,11 @@ from itertools import product import numpy as np -from pykeops.torch import LazyTensor +from alibi_detect.utils.frameworks import has_keops import pytest import torch -from alibi_detect.utils.keops import GaussianRBF +if has_keops: + from pykeops.torch import LazyTensor + from alibi_detect.utils.keops import GaussianRBF sigma = [None, np.array([1.]), np.array([1., 2.])] n_features = [5, 10] @@ -18,6 +20,7 @@ def gaussian_kernel_params(request): return tests_gk[request.param] +@pytest.mark.skipif(not has_keops, reason='Skipping since pykeops is not installed.') @pytest.mark.parametrize('gaussian_kernel_params', list(range(n_tests_gk)), indirect=True) def test_gaussian_kernel(gaussian_kernel_params): sigma, n_features, n_instances, trainable = gaussian_kernel_params From 48ad925589ba1532a72983ed1794dce21d140e11 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Fri, 29 Jul 2022 10:41:11 +0100 Subject: [PATCH 29/50] Add pykeops to op deps ERROR_TYPES --- alibi_detect/utils/missing_optional_dependency.py | 1 + 1 file changed, 1 insertion(+) diff --git a/alibi_detect/utils/missing_optional_dependency.py b/alibi_detect/utils/missing_optional_dependency.py index 6e4f80bad..aa8977588 100644 --- a/alibi_detect/utils/missing_optional_dependency.py +++ b/alibi_detect/utils/missing_optional_dependency.py @@ -27,6 +27,7 @@ "torch": 'torch', "pytorch": 'torch', "keops": 'keops', + "pykeops": 'keops', } From 74ff9923990a2fae4eaf5d7e255b3e9d4a219023 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Fri, 29 Jul 2022 14:23:32 +0100 Subject: [PATCH 30/50] Skip keops tests on MacOS --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce81af98b..f477daaa7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,7 @@ jobs: if [ "$RUNNER_OS" != "Windows" ] && [ ${{ matrix.python }} < '3.10' ]; then # Skip Prophet tests on Windows as installation complex. Skip on Python 3.10 as not supported. python -m pip install --upgrade --upgrade-strategy eager -e .[prophet] fi - if [ "$RUNNER_OS" != "Windows" ]; then # Skip KeOps tests on Windows as KeOps not supported. + if [ "$RUNNER_OS" == "Linux"]; then # Currently, we only support KeOps on Linux. python -m pip install --upgrade --upgrade-strategy eager -e .[keops] fi python -m pip install --upgrade --upgrade-strategy eager -e .[tensorflow,torch] From 7c5e70dfd6ac3f6571215e70edbb982c4942601d Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Fri, 29 Jul 2022 14:47:10 +0100 Subject: [PATCH 31/50] Add note to docs about linux-only support for keops --- doc/source/conf.py | 3 ++- doc/source/overview/getting_started.md | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 98630dfad..cd4bf6319 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -110,7 +110,8 @@ "numba", "pydantic", "toml", - "catalogue" + "catalogue", + "pykeops" ] # Napoleon settings diff --git a/doc/source/overview/getting_started.md b/doc/source/overview/getting_started.md index ee83a09b2..9e987686f 100644 --- a/doc/source/overview/getting_started.md +++ b/doc/source/overview/getting_started.md @@ -176,7 +176,8 @@ The KeOps installation is required to use the KeOps backend for the following de KeOps requires a C++ compiler compatible with `std=c++11`, for example `g++ >=7` or `clang++ >=8`, and a [Cuda toolkit](https://developer.nvidia.com/cuda-toolkit) installation. For more detailed version requirements and testing instructions for KeOps, see the -[KeOps docs](https://www.kernel-operations.io/keops/python/installation.html). +[KeOps docs](https://www.kernel-operations.io/keops/python/installation.html). **Currently, the KeOps backend is +only officially supported on Linux.** ``` ```` From fc12b9d492795f232f97cd478b5c32e41fe41b32 Mon Sep 17 00:00:00 2001 From: Ashley Scillitoe Date: Fri, 29 Jul 2022 14:53:15 +0100 Subject: [PATCH 32/50] Add batch_size_permutations to pydantic models --- alibi_detect/saving/schemas.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/alibi_detect/saving/schemas.py b/alibi_detect/saving/schemas.py index baba5ef96..8d857c9f9 100644 --- a/alibi_detect/saving/schemas.py +++ b/alibi_detect/saving/schemas.py @@ -634,6 +634,7 @@ class MMDDriftConfig(DriftDetectorConfig): sigma: Optional[NDArray[np.float32]] = None configure_kernel_from_x_ref: bool = True n_permutations: int = 100 + batch_size_permutations: int = 1000000 device: Optional[Literal['cpu', 'cuda']] = None @@ -652,6 +653,7 @@ class MMDDriftConfigResolved(DriftDetectorConfigResolved): sigma: Optional[NDArray[np.float32]] = None configure_kernel_from_x_ref: bool = True n_permutations: int = 100 + batch_size_permutations: int = 1000000 device: Optional[Literal['cpu', 'cuda']] = None From f6b331b352d3f4c031b295747d2f0df4a2328936 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 11:41:08 +0100 Subject: [PATCH 33/50] remove print --- alibi_detect/utils/keops/tests/test_kernels_keops.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index abdb5fd92..1f0e20b7c 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -25,9 +25,6 @@ def gaussian_kernel_params(request): def test_gaussian_kernel(gaussian_kernel_params): sigma, n_features, n_instances, trainable = gaussian_kernel_params xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) - - print(sigma, xshape, yshape, trainable) - sigma = sigma if sigma is None else torch.from_numpy(sigma).float() x = torch.from_numpy(np.random.random(xshape)).float() y = torch.from_numpy(np.random.random(yshape)).float() From ace20cca098459ae603ceb77153ba1a27bb562cc Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 11:42:48 +0100 Subject: [PATCH 34/50] remove unnecessary comment --- alibi_detect/utils/keops/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index d0f826772..11a2e3894 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -78,7 +78,7 @@ def forward(self, x: LazyTensor, y: LazyTensor, infer_sigma: bool = False) -> La if infer_sigma or self.init_required: if self.trainable and infer_sigma: raise ValueError("Gradients cannot be computed w.r.t. an inferred sigma value") - sigma = self.init_sigma_fn(x, y, dist) # .to(x.device) + sigma = self.init_sigma_fn(x, y, dist) with torch.no_grad(): self.log_sigma.copy_(sigma.log().clone()) self.init_required = False From 718fb8507df95b84af0055c2a5de0dae0b1d01e2 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 11:57:05 +0100 Subject: [PATCH 35/50] change default bandwidth fn to None --- alibi_detect/utils/keops/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 11a2e3894..7958f5c67 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -36,7 +36,7 @@ class GaussianRBF(nn.Module): def __init__( self, sigma: Optional[torch.Tensor] = None, - init_sigma_fn: Callable = sigma_mean, + init_sigma_fn: Callable = None, trainable: bool = False ) -> None: """ From 5922e3f4259092cfe0fa25225ce8036d73698bb1 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 12:26:39 +0100 Subject: [PATCH 36/50] update infer sigma --- alibi_detect/cd/keops/mmd.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index dc8244dd6..19b68aab8 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -97,7 +97,9 @@ def __init__( self.n_batches = 1 + (n_permutations - 1) // batch_size_permutations # infer the kernel bandwidth from the reference data - if self.infer_sigma or isinstance(sigma, torch.Tensor): + if isinstance(sigma, torch.Tensor): + self.infer_sigma = False + elif self.infer_sigma: x = torch.from_numpy(self.x_ref).to(self.device) _ = self.kernel(LazyTensor(x[:, None, :]), LazyTensor(x[None, :, :]), infer_sigma=self.infer_sigma) self.infer_sigma = False From b8adfbe589d9087f9495b88f31e3a55fea965601 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 19:06:05 +0100 Subject: [PATCH 37/50] update test warning, update and clarify keops kernels logic --- alibi_detect/cd/keops/mmd.py | 4 +- alibi_detect/utils/keops/kernels.py | 37 ++++++++++++++----- .../utils/keops/tests/test_kernels_keops.py | 2 +- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index 19b68aab8..d122031e4 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -138,12 +138,12 @@ def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) x, y = x.to(self.device), y.to(self.device) # batch-wise kernel matrix computation over the permutations + k_xy.append(self.kernel( + LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :]), self.infer_sigma).sum(1).sum(1).squeeze(-1)) k_xx.append(self.kernel( LazyTensor(x[:, :, None, :]), LazyTensor(x[:, None, :, :])).sum(1).sum(1).squeeze(-1)) k_yy.append(self.kernel( LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) - k_xy.append(self.kernel( - LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) stats = c_xx * (torch.cat(k_xx) - m) + c_yy * (torch.cat(k_yy) - n) - c_xy * torch.cat(k_xy) return stats[0], stats[1:] diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 7958f5c67..fad852faa 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -5,9 +5,9 @@ from typing import Callable, Optional -def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: +def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = None) -> torch.Tensor: """ - Bandwidth estimation using the mean heuristic. + Set bandwidth to the mean distance between instances x and y. Parameters ---------- @@ -16,16 +16,29 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor) -> torch.Tensor: y LazyTensor of instances with dimension [1, Ny, features]. dist - LazyTensor with dimensions [Nx, Ny], containing the pairwise distances between `x` and `y`. + LazyTensor with dimensions [Nx, Ny] containing the pairwise distances between `x` and `y`. + n_min + In order to check whether x equals y after squeezing the singleton dimensions, we check if the + diagonal of the distance matrix (which is a lazy tensor from which the diagonal cannot be directly extracted) + consists of all zeros. We do this by computing the k-min distances and k-argmin indices over the + columns of the distance matrix. We then check if the distances on the diagonal of the distance matrix + are all zero or not. If they are all zero, then we do not use these distances (zeros) when computing + the mean pairwise distance as bandwidth. The default `None` sets k to Nx (=Ny). If Nx becomes very large, + it is advised to set `n_min` to a lower value. Returns ------- The computed bandwidth, `sigma`. """ - n = x.shape[0] - if x.shape == y.shape and (dist.min(axis=1) == 0.).all() and \ - (torch.arange(n) == dist.argmin(axis=1).cpu().view(-1)).all(): - n_mean = n * (n - 1) + nx, ny = dist.shape + if nx == ny: + n_min = n_min if isinstance(n_min, int) else nx + d_min, id_min = dist.Kmin_argKmin(n_min, axis=1) + rows, cols = torch.where(id_min.cpu() == torch.arange(nx)[:, None]) + if (d_min[rows, cols] == 0.).all(): + n_mean = n * (n - 1) + else: + n_mean = np.prod(dist.shape) else: n_mean = np.prod(dist.shape) sigma = (.5 * dist.sum(1).sum().unsqueeze(-1) / n_mean) ** .5 @@ -41,8 +54,11 @@ def __init__( ) -> None: """ Gaussian RBF kernel: k(x,y) = exp(-(1/(2*sigma^2)||x-y||^2). A forward pass takes - a batch of instances x [Nx, features] and y [Ny, features] and returns the kernel - matrix [Nx, Ny]. + a batch of instances x and y and returns the kernel matrix. + x can be of shape [Nx, 1, features] or [batch_size, Nx, 1, features]. + y can be of shape [1, Ny, features] or [batch_size, 1, Ny, features]. + The returned kernel matrix can be of shape [Nx, Ny] or [batch_size, Nx, Ny]. + x, y and the returned kernel matrix are all lazy tensors. Parameters ---------- @@ -52,11 +68,12 @@ def __init__( init_sigma_fn Function used to compute the bandwidth `sigma`. Used when `sigma` is to be inferred. The function's signature should match :py:func:`~alibi_detect.utils.keops.kernels.sigma_mean`, - meaning that it should take in the tensors `x`, `y` and `dist` and return `sigma`. + meaning that it should take in the lazy tensors `x`, `y` and `dist` and return a tensor `sigma`. trainable Whether or not to track gradients w.r.t. `sigma` to allow it to be trained. """ super().__init__() + init_sigma_fn = sigma_mean if init_sigma_fn is None else init_sigma_fn if sigma is None: self.log_sigma = nn.Parameter(torch.empty(1), requires_grad=trainable) self.init_required = True diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index 1f0e20b7c..685be989d 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -32,7 +32,7 @@ def test_gaussian_kernel(gaussian_kernel_params): kernel = GaussianRBF(sigma=sigma, trainable=trainable) infer_sigma = True if sigma is None else False if trainable and infer_sigma: - with pytest.raises(Exception): + with pytest.raises(ValueError): kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) else: k_xy = kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) From 015cc5eba464ccc47a346e6cfbaed793d6991d49 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 19:35:20 +0100 Subject: [PATCH 38/50] clean up --- doc/source/examples/cd_mmd_keops.ipynb | 143 +++++++++++++++++++------ 1 file changed, 113 insertions(+), 30 deletions(-) diff --git a/doc/source/examples/cd_mmd_keops.ipynb b/doc/source/examples/cd_mmd_keops.ipynb index b8f486b4b..dac1556e8 100644 --- a/doc/source/examples/cd_mmd_keops.ipynb +++ b/doc/source/examples/cd_mmd_keops.ipynb @@ -3,7 +3,11 @@ { "cell_type": "markdown", "id": "27a4394b", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Scaling up drift detection with KeOps\n", "\n", @@ -41,7 +45,11 @@ "cell_type": "code", "execution_count": null, "id": "a0bf1719", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "!pip install alibi-detect[keops]" @@ -50,7 +58,11 @@ { "cell_type": "markdown", "id": "7ff93d59", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Before we start let’s fix the random seeds for reproducibility:" ] @@ -59,7 +71,11 @@ "cell_type": "code", "execution_count": 1, "id": "2ba95f29", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "import numpy as np\n", @@ -76,7 +92,11 @@ { "cell_type": "markdown", "id": "1910895a", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "\n", "## Vanilla PyTorch vs. KeOps comparison\n", @@ -90,7 +110,11 @@ "cell_type": "code", "execution_count": 2, "id": "a1c65254", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "from alibi_detect.cd import MMDDrift\n", @@ -124,21 +148,21 @@ " # Sample reference and test data\n", " x_ref = np.random.randn(*(n_ref, n_features)).astype(np.float32)\n", " x_test = np.random.randn(*(n_test, n_features)).astype(np.float32) + mu\n", - " \n", + "\n", " # Initialise detector, make and log predictions\n", " p_val = .05\n", " dd = MMDDrift(x_ref, backend=backend, p_val=p_val, n_permutations=100)\n", " start = timer()\n", " pred = dd.predict(x_test)\n", " end = timer()\n", - " \n", + "\n", " if _ > 0: # first run reserved for KeOps compilation\n", " t_detect.append(end - start)\n", " p_vals.append(pred['data']['p_val'])\n", - " \n", + "\n", " del dd, x_ref, x_test\n", " torch.cuda.empty_cache()\n", - " \n", + "\n", " p_vals = np.array(p_vals)\n", " t_mean, t_std = np.array(t_detect).mean(), np.array(t_detect).std()\n", " results = eval_detector(p_vals, p_val, mu == 0., t_mean, t_std)\n", @@ -165,7 +189,7 @@ " return T\n", "\n", "\n", - "def plot_absolute_time(results: dict, n_features: list, y_scale: str = 'linear', \n", + "def plot_absolute_time(results: dict, n_features: list, y_scale: str = 'linear',\n", " detector: str = 'MMD', max_batch_size: int = 1e10):\n", " T = format_results(n_features, ['keops', 'pytorch'], max_batch_size)\n", " colors = ['b', 'g', 'r', 'c', 'm', 'y', 'b']\n", @@ -206,9 +230,13 @@ { "cell_type": "markdown", "id": "43a4ee7e", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "As detailed earlier, we will compare the PyTorch with the KeOps implementation of the MMD detector for a variety of reference and test data batch sizes as well as different feature dimensions. Note that for the PyTorch implementation, the portion of the kernel matrix for the reference data itself can already be computed at initialisation of the detector. This computation will not be included when we record the detector's prediction time. Since use cases where $N_\\text{ref} >> N_\\text{test}$ are quite common, we will also test for this specific setting. The key reason is that we cannot amortise this computation for the KeOps detector since we are working with lazily evaluated symbolic matrices.\n", + "As detailed earlier, we will compare the PyTorch with the KeOps implementation of the MMD detector for a variety of reference and test data batch sizes as well as different feature dimensions. Note that for the PyTorch implementation, the portion of the kernel matrix for the reference data itself can already be computed at initialisation of the detector. This computation will not be included when we record the detector's prediction time. Since use cases where $N_\\text{ref} \\gg N_\\text{test}$ are quite common, we will also test for this specific setting. The key reason is that we cannot amortise this computation for the KeOps detector since we are working with lazily evaluated symbolic matrices.\n", "\n", "#### $N_\\text{ref} = N_\\text{test}$\n", "\n", @@ -219,7 +247,11 @@ "cell_type": "code", "execution_count": 3, "id": "47268603", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "experiments = {\n", @@ -262,7 +294,10 @@ "execution_count": 4, "id": "d556296a", "metadata": { - "scrolled": true + "scrolled": true, + "pycharm": { + "name": "#%%\n" + } }, "outputs": [], "source": [ @@ -280,7 +315,11 @@ { "cell_type": "markdown", "id": "93396443", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "Below we visualise the runtimes of the different experiments. We can make the following observations:\n", "\n", @@ -297,7 +336,11 @@ "cell_type": "code", "execution_count": 5, "id": "5d854bfb", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -323,7 +366,11 @@ "cell_type": "code", "execution_count": 6, "id": "ec9d0fbb", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -345,7 +392,11 @@ { "cell_type": "markdown", "id": "b96a904b", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The difference between KeOps and PyTorch is even more striking when we only look at $[2, 10]$ features:" ] @@ -354,7 +405,11 @@ "cell_type": "code", "execution_count": 7, "id": "0d1e4dfa", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -376,18 +431,26 @@ { "cell_type": "markdown", "id": "6e920708", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ - "#### $N_\\text{ref} >> N_\\text{test}$\n", + "#### $N_\\text{ref} \\gg N_\\text{test}$\n", "\n", - "Now we check whether the speed improvements still hold when $N_\\text{ref} >> N_\\text{test}$ ($N_\\text{ref} / N_\\text{test} = 10$) and a large part of the kernel can already be computed at initialisation time of the PyTorch (but not the KeOps) detector." + "Now we check whether the speed improvements still hold when $N_\\text{ref} \\gg N_\\text{test}$ ($N_\\text{ref} / N_\\text{test} = 10$) and a large part of the kernel can already be computed at initialisation time of the PyTorch (but not the KeOps) detector." ] }, { "cell_type": "code", "execution_count": 8, "id": "a75794e8", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "experiments = {\n", @@ -411,7 +474,11 @@ "cell_type": "code", "execution_count": 9, "id": "fcdd840a", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [], "source": [ "results = {backend: {} for backend in backends}\n", @@ -427,7 +494,11 @@ { "cell_type": "markdown", "id": "27307020", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "The below plots illustrate that KeOps indeed still provides large speed ups over PyTorch. The x-axis shows the reference batch size $N_\\text{ref}$. Note that $N_\\text{ref} / N_\\text{test} = 10$." ] @@ -436,7 +507,11 @@ "cell_type": "code", "execution_count": 10, "id": "0a3c0d27", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -459,7 +534,11 @@ "cell_type": "code", "execution_count": 11, "id": "cf6a0dfc", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, "outputs": [ { "data": { @@ -481,7 +560,11 @@ { "cell_type": "markdown", "id": "f7dc206c", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Conclusion\n", "\n", @@ -510,4 +593,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file From 148019af186d0ec6e02a8f0d55ed4a3257bb0a08 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 19:41:50 +0100 Subject: [PATCH 39/50] update docstring --- alibi_detect/utils/keops/kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index fad852faa..6f28cb6f6 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -12,9 +12,9 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = None Parameters ---------- x - LazyTensor of instances with dimension [Nx, 1, features]. + LazyTensor of instances with dimension [Nx, 1, features]. The singleton dimension is necessary for broadcasting. y - LazyTensor of instances with dimension [1, Ny, features]. + LazyTensor of instances with dimension [1, Ny, features]. The singleton dimension is necessary for broadcasting. dist LazyTensor with dimensions [Nx, Ny] containing the pairwise distances between `x` and `y`. n_min From 74533689dd1fd968b5c1cec7b3f7d7029276ced0 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 9 Aug 2022 19:46:01 +0100 Subject: [PATCH 40/50] fix bug --- alibi_detect/utils/keops/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 6f28cb6f6..e66ec7afc 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -36,7 +36,7 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = None d_min, id_min = dist.Kmin_argKmin(n_min, axis=1) rows, cols = torch.where(id_min.cpu() == torch.arange(nx)[:, None]) if (d_min[rows, cols] == 0.).all(): - n_mean = n * (n - 1) + n_mean = nx * (nx - 1) else: n_mean = np.prod(dist.shape) else: From 2d88bfc5c23bbb725f6911425acbdbb12f682300 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Wed, 10 Aug 2022 13:33:35 +0100 Subject: [PATCH 41/50] undo unnecessary kwarg removal --- alibi_detect/cd/mmd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 74ad8152f..51739a3ea 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -104,7 +104,6 @@ def __init__( pop_kwargs += ['batch_size_permutations'] detector = MMDDriftTorch else: - pop_kwargs += ['configure_kernel_from_x_ref'] detector = MMDDriftKeops [kwargs.pop(k, None) for k in pop_kwargs] From 54df25709a4e25dd08d26f3a35c9ca214a9b0fc4 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Wed, 10 Aug 2022 18:57:06 +0100 Subject: [PATCH 42/50] make test consistent with torch/tf backends --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 8ebfdc647..33bbb1a79 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -36,11 +36,12 @@ def preprocess_list(x: List[np.ndarray]) -> np.ndarray: (preprocess_drift, {'model': HiddenOutput, 'layer': -1}), (preprocess_list, None) ] +update_x_ref = [{'last': 750}, {'reservoir_sampling': 750}, None] preprocess_at_init = [True, False] n_permutations = [10] batch_size_permutations = [10, 1000000] -configure_kernel_from_x_ref = [True, False] -tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_at_init, +configure_kernel_from_x_ref = [True] +tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_at_init, update_x_ref, batch_size_permutations, configure_kernel_from_x_ref)) n_tests = len(tests_mmddrift) @@ -53,7 +54,7 @@ def mmd_params(request): @pytest.mark.skipif(not has_keops, reason='Skipping since pykeops is not installed.') @pytest.mark.parametrize('mmd_params', list(range(n_tests)), indirect=True) def test_mmd(mmd_params): - n_features, n_enc, preprocess, n_permutations, preprocess_at_init, \ + n_features, n_enc, preprocess, n_permutations, preprocess_at_init, update_x_ref, \ batch_size_permutations, configure_kernel_from_x_ref = mmd_params np.random.seed(0) @@ -79,6 +80,7 @@ def test_mmd(mmd_params): x_ref=x_ref, p_val=.05, preprocess_at_init=preprocess_at_init if isinstance(preprocess_fn, Callable) else False, + update_x_ref=update_x_ref, preprocess_fn=preprocess_fn, configure_kernel_from_x_ref=configure_kernel_from_x_ref, n_permutations=n_permutations, @@ -87,6 +89,10 @@ def test_mmd(mmd_params): x = x_ref.copy() preds = cd.predict(x, return_p_val=True) assert preds['data']['is_drift'] == 0 and preds['data']['p_val'] >= cd.p_val + if isinstance(update_x_ref, dict): + k = list(update_x_ref.keys())[0] + assert cd.n == len(x) + len(x_ref) + assert cd.x_ref.shape[0] == min(update_x_ref[k], len(x) + len(x_ref)) x_h1 = np.random.randn(n * n_features).reshape(n, n_features).astype(np.float32) if to_list: From 211eeb97663d67854bb3329fd3c99df548b53895 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Thu, 11 Aug 2022 19:28:02 +0100 Subject: [PATCH 43/50] add _mmd2 test --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 33bbb1a79..efc8b78d0 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -6,6 +6,7 @@ import torch.nn as nn from typing import Callable, List from alibi_detect.utils.frameworks import has_keops +from alibi_detect.utils.pytorch import GaussianRBF, batch_compute_kernel_matrix, mmd2_from_kernel_matrix from alibi_detect.cd.pytorch.preprocess import HiddenOutput, preprocess_drift if has_keops: from alibi_detect.cd.keops.mmd import MMDDriftKeops @@ -104,3 +105,17 @@ def test_mmd(mmd_params): else: assert preds['data']['p_val'] >= preds['data']['threshold'] == cd.p_val assert preds['data']['distance'] <= preds['data']['distance_threshold'] + + # ensure the keops MMD^2 estimate matches the pytorch implementation for the same kernel + if not isinstance(x_ref, list) and update_x_ref is None: + print(x_ref.shape, x_h1.shape) + p_val, mmd2, distance_threshold = cd.score(x_h1) + kernel = GaussianRBF(sigma=cd.kernel.sigma) + if isinstance(preprocess_fn, Callable): + x_ref, x_h1 = cd.preprocess(x_h1) + x_ref = torch.from_numpy(x_ref).float() + x_h1 = torch.from_numpy(x_h1).float() + x_all = torch.cat([x_ref, x_h1], 0) + kernel_mat = kernel(x_all, x_all) + mmd2_torch = mmd2_from_kernel_matrix(kernel_mat, x_h1.shape[0]) + np.testing.assert_almost_equal(mmd2, mmd2_torch, decimal=6) From f98fd830fad0fc8ebb1cb278794e1861262379d5 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Thu, 11 Aug 2022 19:47:51 +0100 Subject: [PATCH 44/50] remove unused import --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index efc8b78d0..8d3cf8124 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -6,7 +6,7 @@ import torch.nn as nn from typing import Callable, List from alibi_detect.utils.frameworks import has_keops -from alibi_detect.utils.pytorch import GaussianRBF, batch_compute_kernel_matrix, mmd2_from_kernel_matrix +from alibi_detect.utils.pytorch import GaussianRBF, mmd2_from_kernel_matrix from alibi_detect.cd.pytorch.preprocess import HiddenOutput, preprocess_drift if has_keops: from alibi_detect.cd.keops.mmd import MMDDriftKeops From 751d3a0cb88366f5f70b09ebf6b122f6c0682c7e Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 15:13:36 +0100 Subject: [PATCH 45/50] clarify docs, remove redundant framework checks --- alibi_detect/cd/mmd.py | 4 ++-- alibi_detect/utils/frameworks.py | 1 + doc/source/cd/methods/mmddrift.ipynb | 34 ++++++++++++++++++++++------ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/alibi_detect/cd/mmd.py b/alibi_detect/cd/mmd.py index 51739a3ea..b00c20449 100644 --- a/alibi_detect/cd/mmd.py +++ b/alibi_detect/cd/mmd.py @@ -97,10 +97,10 @@ def __init__( kwargs = locals() args = [kwargs['x_ref']] pop_kwargs = ['self', 'x_ref', 'backend', '__class__'] - if backend == 'tensorflow' and has_tensorflow: + if backend == 'tensorflow': pop_kwargs += ['device', 'batch_size_permutations'] detector = MMDDriftTF - elif backend == 'pytorch' and has_pytorch: + elif backend == 'pytorch': pop_kwargs += ['batch_size_permutations'] detector = MMDDriftTorch else: diff --git a/alibi_detect/utils/frameworks.py b/alibi_detect/utils/frameworks.py index c475c49b7..1a5f7e5ef 100644 --- a/alibi_detect/utils/frameworks.py +++ b/alibi_detect/utils/frameworks.py @@ -15,6 +15,7 @@ try: import pykeops # noqa + import torch # noqa has_keops = True except ImportError: has_keops = False diff --git a/doc/source/cd/methods/mmddrift.ipynb b/doc/source/cd/methods/mmddrift.ipynb index 63523a264..cb85b7345 100644 --- a/doc/source/cd/methods/mmddrift.ipynb +++ b/doc/source/cd/methods/mmddrift.ipynb @@ -2,14 +2,22 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "[source](../../api/alibi_detect.cd.mmd.rst)" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "# Maximum Mean Discrepancy\n", "\n", @@ -30,7 +38,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Usage\n", "\n", @@ -88,7 +100,7 @@ "cd_keops = MMDDrift(x_ref, backend='keops', p_val=.05)\n", "```\n", "\n", - "We can also easily add preprocessing functions for the *TensorFlow * and *PyTorch* frameworks. The following example uses a randomly initialized image encoder in PyTorch:\n", + "We can also easily add preprocessing functions for the *TensorFlow* and *PyTorch* frameworks. Note that we can also combine for instance a PyTorch preprocessing step with a KeOps detector. The following example uses a randomly initialized image encoder in PyTorch:\n", "\n", "```python\n", "from functools import partial\n", @@ -157,7 +169,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "### Detect Drift\n", "\n", @@ -183,7 +199,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, "source": [ "## Examples\n", "\n", @@ -229,4 +249,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file From 7c2d7811c884bb36f656aa858e81d149684dc06a Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 15:49:58 +0100 Subject: [PATCH 46/50] remove print --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 8d3cf8124..53b47cc27 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -108,7 +108,6 @@ def test_mmd(mmd_params): # ensure the keops MMD^2 estimate matches the pytorch implementation for the same kernel if not isinstance(x_ref, list) and update_x_ref is None: - print(x_ref.shape, x_h1.shape) p_val, mmd2, distance_threshold = cd.score(x_h1) kernel = GaussianRBF(sigma=cd.kernel.sigma) if isinstance(preprocess_fn, Callable): From 3f69740291e4e9e2c3d2a36cca62a5bc0686bc3b Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 16:19:49 +0100 Subject: [PATCH 47/50] update docs keops --- alibi_detect/cd/keops/mmd.py | 1 + doc/source/cd/methods/mmddrift.ipynb | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/alibi_detect/cd/keops/mmd.py b/alibi_detect/cd/keops/mmd.py index d122031e4..86173ad13 100644 --- a/alibi_detect/cd/keops/mmd.py +++ b/alibi_detect/cd/keops/mmd.py @@ -145,6 +145,7 @@ def _mmd2(self, x_all: torch.Tensor, perms: List[torch.Tensor], m: int, n: int) k_yy.append(self.kernel( LazyTensor(y[:, :, None, :]), LazyTensor(y[:, None, :, :])).sum(1).sum(1).squeeze(-1)) c_xx, c_yy, c_xy = 1 / (m * (m - 1)), 1 / (n * (n - 1)), 2. / (m * n) + # Note that the MMD^2 estimates assume that the diagonal of the kernel matrix consists of 1's stats = c_xx * (torch.cat(k_xx) - m) + c_yy * (torch.cat(k_yy) - n) - c_xy * torch.cat(k_xy) return stats[0], stats[1:] diff --git a/doc/source/cd/methods/mmddrift.ipynb b/doc/source/cd/methods/mmddrift.ipynb index cb85b7345..2f68cb664 100644 --- a/doc/source/cd/methods/mmddrift.ipynb +++ b/doc/source/cd/methods/mmddrift.ipynb @@ -56,7 +56,7 @@ "\n", "Keyword arguments:\n", "\n", - "* `backend`: **TensorFlow**, **PyTorch** and [**KeOps**](https://github.com/getkeops/keops) implementations of the MMD detector as well as various preprocessing steps are available. Specify the backend (*tensorflow*, *pytorch* or *keops*). Defaults to *tensorflow*.\n", + "* `backend`: **TensorFlow**, **PyTorch** and [**KeOps**](https://github.com/getkeops/keops) implementations of the MMD detector are available. Specify the backend (*tensorflow*, *pytorch* or *keops*). Defaults to *tensorflow*.\n", "\n", "* `p_val`: p-value used for significance of the permutation test.\n", "\n", @@ -68,7 +68,7 @@ "\n", "* `preprocess_fn`: Function to preprocess the data before computing the data drift metrics. Typically a dimensionality reduction technique.\n", "\n", - "* `kernel`: Kernel used when computing the MMD. Defaults to a Gaussian RBF kernel (`from alibi_detect.utils.pytorch import GaussianRBF`, `from alibi_detect.utils.tensorflow import GaussianRBF` or `from alibi_detect.utils.keops import GaussianRBF` dependent on the backend used).\n", + "* `kernel`: Kernel used when computing the MMD. Defaults to a Gaussian RBF kernel (`from alibi_detect.utils.pytorch import GaussianRBF`, `from alibi_detect.utils.tensorflow import GaussianRBF` or `from alibi_detect.utils.keops import GaussianRBF` dependent on the backend used). Note that for the KeOps backend, the diagonal entries of the kernel matrices `kernel(x_ref, x_ref)` and `kernel(x_test, x_test)` should be equal to 1. This is compliant with the default Gaussian RBF kernel.\n", "\n", "* `sigma`: Optional bandwidth for the kernel as a `np.ndarray`. We can also average over a number of different bandwidths, e.g. `np.array([.5, 1., 1.5])`.\n", "\n", From ac5fe64c6c42f06a72c25af2f5d7abf7eaccbf03 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 18:28:32 +0100 Subject: [PATCH 48/50] batched version of sigma_mean part 1 --- alibi_detect/cd/keops/tests/test_mmd_keops.py | 2 +- alibi_detect/utils/keops/kernels.py | 29 ++++++++++------ .../utils/keops/tests/test_kernels_keops.py | 34 ++++++++++++++----- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/alibi_detect/cd/keops/tests/test_mmd_keops.py b/alibi_detect/cd/keops/tests/test_mmd_keops.py index 53b47cc27..a64a78173 100644 --- a/alibi_detect/cd/keops/tests/test_mmd_keops.py +++ b/alibi_detect/cd/keops/tests/test_mmd_keops.py @@ -41,7 +41,7 @@ def preprocess_list(x: List[np.ndarray]) -> np.ndarray: preprocess_at_init = [True, False] n_permutations = [10] batch_size_permutations = [10, 1000000] -configure_kernel_from_x_ref = [True] +configure_kernel_from_x_ref = [True, False] tests_mmddrift = list(product(n_features, n_enc, preprocess, n_permutations, preprocess_at_init, update_x_ref, batch_size_permutations, configure_kernel_from_x_ref)) n_tests = len(tests_mmddrift) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index e66ec7afc..201003539 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -12,11 +12,14 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = None Parameters ---------- x - LazyTensor of instances with dimension [Nx, 1, features]. The singleton dimension is necessary for broadcasting. + LazyTensor of instances with dimension [Nx, 1, features] or [batch_size, Nx, 1, features]. + The singleton dimension is necessary for broadcasting. y - LazyTensor of instances with dimension [1, Ny, features]. The singleton dimension is necessary for broadcasting. + LazyTensor of instances with dimension [1, Ny, features] or [batch_size, 1, Ny, features]. + The singleton dimension is necessary for broadcasting. dist - LazyTensor with dimensions [Nx, Ny] containing the pairwise distances between `x` and `y`. + LazyTensor with dimensions [Nx, Ny] or [batch_size, Nx, Ny] containing the + pairwise distances between `x` and `y`. n_min In order to check whether x equals y after squeezing the singleton dimensions, we check if the diagonal of the distance matrix (which is a lazy tensor from which the diagonal cannot be directly extracted) @@ -30,18 +33,24 @@ def sigma_mean(x: LazyTensor, y: LazyTensor, dist: LazyTensor, n_min: int = None ------- The computed bandwidth, `sigma`. """ - nx, ny = dist.shape + batched = len(dist.shape) == 3 + if not batched: + nx, ny = dist.shape + axis = 1 + else: + batch_size, nx, ny = dist.shape + axis = 2 + n_mean = nx * ny if nx == ny: n_min = n_min if isinstance(n_min, int) else nx - d_min, id_min = dist.Kmin_argKmin(n_min, axis=1) + d_min, id_min = dist.Kmin_argKmin(n_min, axis=axis) + if batched: + d_min, id_min = d_min[0], id_min[0] # first instance in permutation test contains the original data rows, cols = torch.where(id_min.cpu() == torch.arange(nx)[:, None]) if (d_min[rows, cols] == 0.).all(): n_mean = nx * (nx - 1) - else: - n_mean = np.prod(dist.shape) - else: - n_mean = np.prod(dist.shape) - sigma = (.5 * dist.sum(1).sum().unsqueeze(-1) / n_mean) ** .5 + dist_sum = dist.sum(1).sum(1)[0] if batched else dist.sum(1).sum().unsqueeze(-1) + sigma = (.5 * dist_sum / n_mean) ** .5 return sigma diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index 685be989d..a33161c3e 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -10,8 +10,9 @@ sigma = [None, np.array([1.]), np.array([1., 2.])] n_features = [5, 10] n_instances = [(100, 100), (100, 75)] +batch_size = [None, 5] trainable = [True, False] -tests_gk = list(product(sigma, n_features, n_instances, trainable)) +tests_gk = list(product(sigma, n_features, n_instances, batch_size, trainable)) n_tests_gk = len(tests_gk) @@ -23,20 +24,37 @@ def gaussian_kernel_params(request): @pytest.mark.skipif(not has_keops, reason='Skipping since pykeops is not installed.') @pytest.mark.parametrize('gaussian_kernel_params', list(range(n_tests_gk)), indirect=True) def test_gaussian_kernel(gaussian_kernel_params): - sigma, n_features, n_instances, trainable = gaussian_kernel_params + sigma, n_features, n_instances, batch_size, trainable = gaussian_kernel_params + + print(sigma, n_features, n_instances, batch_size, trainable) + xshape, yshape = (n_instances[0], n_features), (n_instances[1], n_features) + if batch_size: + xshape = (batch_size, ) + xshape + yshape = (batch_size, ) + yshape sigma = sigma if sigma is None else torch.from_numpy(sigma).float() x = torch.from_numpy(np.random.random(xshape)).float() y = torch.from_numpy(np.random.random(yshape)).float() + if batch_size: + x_lazy, y_lazy = LazyTensor(x[:, :, None, :]), LazyTensor(y[:, None, :, :]) + x_lazy2 = LazyTensor(x[:, None, :, :]) + else: + x_lazy, y_lazy = LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]) + x_lazy2 = LazyTensor(x[None, :, :]) kernel = GaussianRBF(sigma=sigma, trainable=trainable) infer_sigma = True if sigma is None else False if trainable and infer_sigma: with pytest.raises(ValueError): - kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) + kernel(x_lazy, y_lazy, infer_sigma=infer_sigma) else: - k_xy = kernel(LazyTensor(x[:, None, :]), LazyTensor(y[None, :, :]), infer_sigma=infer_sigma) - k_xx = kernel(LazyTensor(x[:, None, :]), LazyTensor(x[None, :, :]), infer_sigma=infer_sigma) - assert k_xy.shape == n_instances and k_xx.shape == (xshape[0], xshape[0]) - assert (torch.arange(xshape[0]) == k_xx.argmax(axis=1).cpu().view(-1)).all() - assert (k_xx.min(axis=1) >= 0.).all() and (k_xy.min(axis=1) >= 0.).all() + k_xy = kernel(x_lazy, y_lazy, infer_sigma=infer_sigma) + k_xx = kernel(x_lazy, x_lazy2, infer_sigma=infer_sigma) + k_xy_shape = n_instances + k_xx_shape = (n_instances[0], n_instances[0]) + if batch_size: + k_xy_shape = (batch_size, ) + k_xy_shape + k_xx_shape = (batch_size, ) + k_xx_shape + assert k_xy.shape == k_xy_shape and k_xx.shape == k_xx_shape + #assert (torch.arange(xshape[0]) == k_xx.argmax(axis=1).cpu().view(-1)).all() + #assert (k_xx.min(axis=1) >= 0.).all() and (k_xy.min(axis=1) >= 0.).all() From 4ce018b898c113b8bf6e14d84966bcd4639d5611 Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 18:33:46 +0100 Subject: [PATCH 49/50] remove unused import --- alibi_detect/utils/keops/kernels.py | 1 - 1 file changed, 1 deletion(-) diff --git a/alibi_detect/utils/keops/kernels.py b/alibi_detect/utils/keops/kernels.py index 201003539..380ddadb9 100644 --- a/alibi_detect/utils/keops/kernels.py +++ b/alibi_detect/utils/keops/kernels.py @@ -1,4 +1,3 @@ -import numpy as np from pykeops.torch import LazyTensor import torch import torch.nn as nn From 95634a1d11922ab72734704705b5817c4e094bab Mon Sep 17 00:00:00 2001 From: Arnaud Van Looveren Date: Tue, 16 Aug 2022 18:41:40 +0100 Subject: [PATCH 50/50] update keops kernels test --- alibi_detect/utils/keops/tests/test_kernels_keops.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/alibi_detect/utils/keops/tests/test_kernels_keops.py b/alibi_detect/utils/keops/tests/test_kernels_keops.py index a33161c3e..0c1489410 100644 --- a/alibi_detect/utils/keops/tests/test_kernels_keops.py +++ b/alibi_detect/utils/keops/tests/test_kernels_keops.py @@ -52,9 +52,15 @@ def test_gaussian_kernel(gaussian_kernel_params): k_xx = kernel(x_lazy, x_lazy2, infer_sigma=infer_sigma) k_xy_shape = n_instances k_xx_shape = (n_instances[0], n_instances[0]) + axis = 1 if batch_size: k_xy_shape = (batch_size, ) + k_xy_shape k_xx_shape = (batch_size, ) + k_xx_shape + axis = 2 assert k_xy.shape == k_xy_shape and k_xx.shape == k_xx_shape - #assert (torch.arange(xshape[0]) == k_xx.argmax(axis=1).cpu().view(-1)).all() - #assert (k_xx.min(axis=1) >= 0.).all() and (k_xy.min(axis=1) >= 0.).all() + k_xx_argmax = k_xx.argmax(axis=axis) + k_xx_min, k_xy_min = k_xx.min(axis=axis), k_xy.min(axis=axis) + if batch_size: + k_xx_argmax, k_xx_min, k_xy_min = k_xx_argmax[0], k_xx_min[0], k_xy_min[0] + assert (torch.arange(n_instances[0]) == k_xx_argmax.cpu().view(-1)).all() + assert (k_xx_min >= 0.).all() and (k_xy_min >= 0.).all()