From 8ad50fa5386e7e105c72764ec11308b038d35dda Mon Sep 17 00:00:00 2001
From: Gregory Lee <grlee77@gmail.com>
Date: Wed, 5 Oct 2022 14:58:41 -0400
Subject: [PATCH] Add cucim.skimage.feature.match_descriptors (#338)

closes #193

This PR adds `cucim.skimage.feature.match_descriptors`. It is a very straightforward adaptation of the scikit-image code, only substituting numpy->cupy. The only differences of note are:
- when new enough CuPy and pylibraft are not available it warns and falls back to `cdist` on the CPU
- test cases involving BRIEF temporarily involve round trip to the host until we implement BRIEF here (looks not too hard)

This PR also removes the deprecated `masked_register_translation` and `register_translation` (these were moved to cucim.skimage.registration.phase_cross_correlation`) from the `feature` module. Removing those was missed when previously updating the API here to match scikit-image 0.19.

Authors:
  - Gregory Lee (https://github.com/grlee77)

Approvers:
  - Gigon Bae (https://github.com/gigony)

URL: https://github.com/rapidsai/cucim/pull/338
---
 .../src/cucim/skimage/feature/__init__.py     |  34 +---
 .../cucim/src/cucim/skimage/feature/match.py  | 120 +++++++++++
 .../cucim/skimage/feature/tests/test_match.py | 187 ++++++++++++++++++
 3 files changed, 309 insertions(+), 32 deletions(-)
 create mode 100644 python/cucim/src/cucim/skimage/feature/match.py
 create mode 100644 python/cucim/src/cucim/skimage/feature/tests/test_match.py

diff --git a/python/cucim/src/cucim/skimage/feature/__init__.py b/python/cucim/src/cucim/skimage/feature/__init__.py
index 4de8a014b..301816382 100644
--- a/python/cucim/src/cucim/skimage/feature/__init__.py
+++ b/python/cucim/src/cucim/skimage/feature/__init__.py
@@ -6,40 +6,11 @@
                      corner_peaks, corner_shi_tomasi, hessian_matrix,
                      hessian_matrix_det, hessian_matrix_eigvals, shape_index,
                      structure_tensor, structure_tensor_eigenvalues)
+from .match import match_descriptors
 from .peak import peak_local_max
 from .template import match_template
 
 
-@deprecated(
-    alt_func="cucim.skimage.registration.phase_cross_correlation",
-    removed_version="0.19",
-)
-def masked_register_translation(
-    src_image, target_image, src_mask, target_mask=None, overlap_ratio=0.3
-):
-    from ..registration import phase_cross_correlation
-
-    return phase_cross_correlation(
-        src_image,
-        target_image,
-        reference_mask=src_mask,
-        moving_mask=target_mask,
-        overlap_ratio=overlap_ratio,
-    )
-
-
-@deprecated(
-    alt_func="cucim.skimage.registration.phase_cross_correlation",
-    removed_version="0.19",
-)
-def register_translation(
-    src_image, target_image, upsample_factor=1, space="real", return_error=True
-):
-    from ..registration._phase_cross_correlation import \
-        phase_cross_correlation as func
-    return func(src_image, target_image, upsample_factor, space, return_error)
-
-
 __all__ = ['canny',
            'daisy',
            'multiscale_basic_features',
@@ -61,5 +32,4 @@ def register_translation(
            # 'corner_fast',
            # 'corner_orientations',
            'match_template',
-           'register_translation',
-           'masked_register_translation']
+           'match_descriptors']
diff --git a/python/cucim/src/cucim/skimage/feature/match.py b/python/cucim/src/cucim/skimage/feature/match.py
new file mode 100644
index 000000000..0187e369c
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/feature/match.py
@@ -0,0 +1,120 @@
+import warnings
+
+import cupy as cp
+
+try:
+    # CuPy's cdist will only work if pylibraft is available
+    import pylibraft  # noqa
+    from cupyx.scipy.spatial.distance import cdist
+    have_gpu_cdist = True
+except ImportError:
+    from scipy.spatial.distance import cdist
+    have_gpu_cdist = False
+
+
+def match_descriptors(descriptors1, descriptors2, metric=None, p=2,
+                      max_distance=cp.inf, cross_check=True, max_ratio=1.0):
+    """Brute-force matching of descriptors.
+
+    For each descriptor in the first set this matcher finds the closest
+    descriptor in the second set (and vice-versa in the case of enabled
+    cross-checking).
+
+    Parameters
+    ----------
+    descriptors1 : (M, P) array
+        Descriptors of size P about M keypoints in the first image.
+    descriptors2 : (N, P) array
+        Descriptors of size P about N keypoints in the second image.
+    metric : {'euclidean', 'cityblock', 'minkowski', 'hamming', ...} , optional
+        The metric to compute the distance between two descriptors. See
+        `scipy.spatial.distance.cdist` for all possible types. The hamming
+        distance should be used for binary descriptors. By default the L2-norm
+        is used for all descriptors of dtype float or double and the Hamming
+        distance is used for binary descriptors automatically.
+    p : int, optional
+        The p-norm to apply for ``metric='minkowski'``.
+    max_distance : float, optional
+        Maximum allowed distance between descriptors of two keypoints
+        in separate images to be regarded as a match.
+    cross_check : bool, optional
+        If True, the matched keypoints are returned after cross checking i.e. a
+        matched pair (keypoint1, keypoint2) is returned if keypoint2 is the
+        best match for keypoint1 in second image and keypoint1 is the best
+        match for keypoint2 in first image.
+    max_ratio : float, optional
+        Maximum ratio of distances between first and second closest descriptor
+        in the second set of descriptors. This threshold is useful to filter
+        ambiguous matches between the two descriptor sets. The choice of this
+        value depends on the statistics of the chosen descriptor, e.g.,
+        for SIFT descriptors a value of 0.8 is usually chosen, see
+        D.G. Lowe, "Distinctive Image Features from Scale-Invariant Keypoints",
+        International Journal of Computer Vision, 2004.
+
+    Returns
+    -------
+    matches : (Q, 2) array
+        Indices of corresponding matches in first and second set of
+        descriptors, where ``matches[:, 0]`` denote the indices in the first
+        and ``matches[:, 1]`` the indices in the second set of descriptors.
+
+    """
+
+    if descriptors1.shape[1] != descriptors2.shape[1]:
+        raise ValueError("Descriptor length must equal.")
+
+    if metric is None:
+        if cp.issubdtype(descriptors1.dtype, bool):
+            metric = 'hamming'
+        else:
+            metric = 'euclidean'
+
+    kwargs = {}
+    # Scipy raises an error if p is passed as an extra argument when it isn't
+    # necessary for the chosen metric.
+    if metric == 'minkowski':
+        kwargs['p'] = p
+
+    if not have_gpu_cdist:
+        warnings.warn("pylibraft not found, falling back to SciPy "
+                      "implementation of cdist on the CPU")
+        distances = cp.array(
+            cdist(
+                cp.asnumpy(descriptors1),
+                cp.asnumpy(descriptors2),
+                metric=metric,
+                **kwargs
+            )
+        )
+    else:
+        distances = cdist(descriptors1, descriptors2, metric=metric, **kwargs)
+
+    indices1 = cp.arange(descriptors1.shape[0])
+    indices2 = cp.argmin(distances, axis=1)
+
+    if cross_check:
+        matches1 = cp.argmin(distances, axis=0)
+        mask = indices1 == matches1[indices2]
+        indices1 = indices1[mask]
+        indices2 = indices2[mask]
+
+    if max_distance < cp.inf:
+        mask = distances[indices1, indices2] < max_distance
+        indices1 = indices1[mask]
+        indices2 = indices2[mask]
+
+    if max_ratio < 1.0:
+        best_distances = distances[indices1, indices2]
+        distances[indices1, indices2] = cp.inf
+        second_best_indices2 = cp.argmin(distances[indices1], axis=1)
+        second_best_distances = distances[indices1, second_best_indices2]
+        second_best_distances[second_best_distances == 0] \
+            = cp.finfo(cp.float64).eps
+        ratio = best_distances / second_best_distances
+        mask = ratio < max_ratio
+        indices1 = indices1[mask]
+        indices2 = indices2[mask]
+
+    matches = cp.stack((indices1, indices2), axis=-1)
+
+    return matches
diff --git a/python/cucim/src/cucim/skimage/feature/tests/test_match.py b/python/cucim/src/cucim/skimage/feature/tests/test_match.py
new file mode 100644
index 000000000..8bee76b53
--- /dev/null
+++ b/python/cucim/src/cucim/skimage/feature/tests/test_match.py
@@ -0,0 +1,187 @@
+import math
+
+import cupy as cp
+from cupy.testing import assert_array_equal
+from skimage import data
+# TODO: change to cucim.skimage.feature.BRIEF once implemented
+from skimage.feature import BRIEF
+
+from cucim.skimage import transform
+from cucim.skimage._shared import testing
+from cucim.skimage.color import rgb2gray
+from cucim.skimage.feature import corner_harris, corner_peaks, match_descriptors
+
+
+def test_binary_descriptors_unequal_descriptor_sizes_error():
+    """Sizes of descriptors of keypoints to be matched should be equal."""
+    descs1 = cp.array([[True, True, False, True],
+                       [False, True, False, True]])
+    descs2 = cp.array([[True, False, False, True, False],
+                       [False, True, True, True, False]])
+    with testing.raises(ValueError):
+        match_descriptors(descs1, descs2)
+
+
+def test_binary_descriptors():
+    descs1 = cp.array([[True, True, False, True, True],
+                       [False, True, False, True, True]])
+    descs2 = cp.array([[True, False, False, True, False],
+                       [False, False, True, True, True]])
+    matches = match_descriptors(descs1, descs2)
+    assert_array_equal(matches, [[0, 0], [1, 1]])
+
+
+def test_binary_descriptors_rotation_crosscheck_false():
+    """Verify matched keypoints and their corresponding masks results between
+    image and its rotated version with the expected keypoint pairs with
+    cross_check disabled."""
+    img = cp.array(data.astronaut())
+    img = rgb2gray(img)
+    tform = transform.SimilarityTransform(
+        scale=1, rotation=0.15, translation=(0, 0)
+    )
+    rotated_img = transform.warp(img, tform, clip=False)
+
+    extractor = BRIEF(descriptor_size=512)
+
+    keypoints1 = corner_peaks(corner_harris(img), min_distance=5,
+                              threshold_abs=0, threshold_rel=0.1)
+    extractor.extract(cp.asnumpy(img), cp.asnumpy(keypoints1))
+    descriptors1 = cp.array(extractor.descriptors)
+
+    keypoints2 = corner_peaks(corner_harris(rotated_img), min_distance=5,
+                              threshold_abs=0, threshold_rel=0.1)
+    extractor.extract(cp.asnumpy(rotated_img), cp.asnumpy(keypoints2))
+    descriptors2 = cp.array(extractor.descriptors)
+
+    matches = match_descriptors(descriptors1, descriptors2, cross_check=False)
+
+    exp_matches1 = cp.arange(47)
+    exp_matches2 = cp.array([0, 2, 1, 3, 4, 5, 7, 8, 14, 9, 11, 13,
+                             23, 15, 16, 22, 17, 19, 37, 18, 24, 27,
+                             30, 25, 26, 32, 28, 35, 37, 42, 29, 38,
+                             33, 40, 36, 39, 10, 36, 43, 15, 35, 41,
+                             6, 37, 32, 24, 8])
+
+    assert_array_equal(matches[:, 0], exp_matches1)
+    assert_array_equal(matches[:, 1], exp_matches2)
+
+    # minkowski takes a different code path, therefore we test it explicitly
+    matches = match_descriptors(descriptors1, descriptors2,
+                                metric='minkowski', cross_check=False)
+    assert_array_equal(matches[:, 0], exp_matches1)
+    assert_array_equal(matches[:, 1], exp_matches2)
+
+    # it also has an extra parameter
+    matches = match_descriptors(descriptors1, descriptors2,
+                                metric='minkowski', p=4, cross_check=False)
+    assert_array_equal(matches[:, 0], exp_matches1)
+    assert_array_equal(matches[:, 1], exp_matches2)
+
+
+def test_binary_descriptors_rotation_crosscheck_true():
+    """Verify matched keypoints and their corresponding masks results between
+    image and its rotated version with the expected keypoint pairs with
+    cross_check enabled."""
+    img = cp.array(data.astronaut())
+    img = rgb2gray(img)
+    tform = transform.SimilarityTransform(
+        scale=1, rotation=0.15, translation=(0, 0)
+    )
+    rotated_img = transform.warp(img, tform, clip=False)
+
+    extractor = BRIEF(descriptor_size=512)
+
+    keypoints1 = corner_peaks(corner_harris(img), min_distance=5,
+                              threshold_abs=0, threshold_rel=0.1)
+    extractor.extract(cp.asnumpy(img), cp.asnumpy(keypoints1))
+    descriptors1 = cp.array(extractor.descriptors)
+
+    keypoints2 = corner_peaks(corner_harris(rotated_img), min_distance=5,
+                              threshold_abs=0, threshold_rel=0.1)
+    extractor.extract(cp.asnumpy(rotated_img), cp.asnumpy(keypoints2))
+    descriptors2 = cp.array(extractor.descriptors)
+
+    matches = match_descriptors(descriptors1, descriptors2, cross_check=True)
+
+    exp_matches1 = cp.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                             13, 14, 15, 16, 17, 19, 20, 21, 22, 23,
+                             24, 26, 27, 28, 29, 30, 31, 32, 33,
+                             34, 38, 41, 42])
+    exp_matches2 = cp.array([0, 2, 1, 3, 4, 5, 7, 8, 14, 9, 11, 13,
+                             23, 15, 16, 22, 17, 19, 18, 24, 27, 30,
+                             25, 26, 28, 35, 37, 42, 29, 38, 33,
+                             40, 36, 43, 41, 6])
+    assert_array_equal(matches[:, 0], exp_matches1)
+    assert_array_equal(matches[:, 1], exp_matches2)
+
+
+def test_max_distance():
+    descs1 = cp.zeros((10, 128))
+    descs2 = cp.zeros((15, 128))
+
+    descs1[0, :] = 1
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_distance=0.1, cross_check=False)
+    assert len(matches) == 9
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_distance=math.sqrt(128.1),
+                                cross_check=False)
+    assert len(matches) == 10
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_distance=0.1,
+                                cross_check=True)
+    assert_array_equal(matches, [[1, 0]])
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_distance=math.sqrt(128.1),
+                                cross_check=True)
+    assert_array_equal(matches, [[1, 0]])
+
+
+def test_max_ratio():
+    descs1 = 10 * cp.arange(10)[:, None].astype(cp.float32)
+    descs2 = 10 * cp.arange(15)[:, None].astype(cp.float32)
+
+    descs2[0] = 5.0
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=1.0, cross_check=False)
+    assert_array_equal(len(matches), 10)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=0.6, cross_check=False)
+    assert_array_equal(len(matches), 10)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=0.5, cross_check=False)
+    assert_array_equal(len(matches), 9)
+
+    descs1[0] = 7.5
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=0.5, cross_check=False)
+    assert_array_equal(len(matches), 9)
+
+    descs2 = 10 * cp.arange(1)[:, None].astype(cp.float32)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=1.0, cross_check=False)
+    assert_array_equal(len(matches), 10)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=0.5, cross_check=False)
+    assert_array_equal(len(matches), 10)
+
+    descs1 = 10 * cp.arange(1)[:, None].astype(cp.float32)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=1.0, cross_check=False)
+    assert_array_equal(len(matches), 1)
+
+    matches = match_descriptors(descs1, descs2, metric='euclidean',
+                                max_ratio=0.5, cross_check=False)
+    assert_array_equal(len(matches), 1)