From 6e0375c28056aa0f5c517f9bc7ebfe8ee05bc29d Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 28 Oct 2022 08:52:50 -0400 Subject: [PATCH 1/5] use fused kernels to reduce overhead in corner detectors --- .../cucim/src/cucim/skimage/feature/corner.py | 109 +++++++++--------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index f52f98ad7..97611d852 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -518,6 +518,13 @@ def shape_index(image, sigma=1, mode="constant", cval=0): return (2.0 / np.pi) * np.arctan((l2 + l1) / (l2 - l1)) +@cp.fuse() +def _kitchen_rosenfeld_inner(imx, imy, imxx, imxy, imyy): + numerator = imxx * imy ** 2 + imyy * imx ** 2 - 2 * imxy * imx * imy + denominator = imx ** 2 + imy ** 2 + return numerator, denominator + + def corner_kitchen_rosenfeld(image, mode="constant", cval=0): """Compute Kitchen and Rosenfeld corner measure response image. @@ -556,23 +563,9 @@ def corner_kitchen_rosenfeld(image, mode="constant", cval=0): imy, imx = _compute_derivatives(image, mode=mode, cval=cval) imxy, imxx = _compute_derivatives(imx, mode=mode, cval=cval) - imyy, imyx = _compute_derivatives(imy, mode=mode, cval=cval) - - # numerator = imxx * imy ** 2 + imyy * imx ** 2 - 2 * imxy * imx * imy - numerator = imxx * imy - numerator *= imy - tmp = imyy * imx - tmp *= imx - numerator += tmp - tmp = 2 * imxy - tmp *= imx - tmp *= imy - numerator -= tmp - - # denominator = imx ** 2 + imy ** 2 - denominator = imx * imx - denominator += imy * imy + imyy, _ = _compute_derivatives(imy, mode=mode, cval=cval) + numerator, denominator = _kitchen_rosenfeld_inner(imx, imy, imxx, imxy, imyy) response = cp.zeros_like(image, dtype=float_dtype) mask = denominator != 0 @@ -581,6 +574,24 @@ def corner_kitchen_rosenfeld(image, mode="constant", cval=0): return response +@cp.fuse +def _corner_harris_inner_k(Arr, Acc, Arc, k): + # determinant + detA = Arr * Acc - Arc * Arc + # trace + traceA = Arr + Acc + return detA - k * traceA * traceA + + +@cp.fuse +def _corner_harris_inner(Arr, Acc, Arc, eps): + # determinant + detA = Arr * Acc - Arc * Arc + # trace + traceA = Arr + Acc + return 2 * detA / (traceA + eps) + + def corner_harris(image, method="k", k=0.05, eps=1e-6, sigma=1): """Compute Harris corner measure response image. @@ -645,23 +656,19 @@ def corner_harris(image, method="k", k=0.05, eps=1e-6, sigma=1): [7, 7]]) """ - Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") - - # determinant - detA = Arr * Acc - detA -= Arc * Arc - # trace - traceA = Arr + Acc - if method == "k": - response = detA - k * traceA * traceA + response = _corner_harris_inner_k(Arr, Acc, Arc, k) else: - response = 2 * detA / (traceA + eps) - + response = _corner_harris_inner_k(Arr, Acc, Arc, eps) return response +@cp.fuse() +def _shi_tomasi_fused(Arr, Acc, Arc): + return ((Arr + Acc) - cp.sqrt((Arr - Acc) ** 2 + 4 * Arc ** 2)) / 2 + + def corner_shi_tomasi(image, sigma=1): """Compute Shi-Tomasi (Kanade-Tomasi) corner measure response image. @@ -716,23 +723,26 @@ def corner_shi_tomasi(image, sigma=1): [7, 7]]) """ - Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") - # minimum eigenvalue of A + return _shi_tomasi_fused(Arr, Acc, Arc) - # response = ((Axx + Ayy) - np.sqrt((Axx - Ayy) ** 2 + 4 * Axy ** 2)) / 2 - tmp = Arr - Acc - tmp *= tmp - tmp2 = 4 * Arc - tmp2 *= Arc - tmp += tmp2 - cp.sqrt(tmp, out=tmp) - tmp /= 2 - response = Arr + Acc - response -= tmp - return response +@cp.fuse +def _forstner_inner(Arr, Acc, Arc): + # determinant + detA = Arr * Acc - Arc * Arc + # trace + traceA = Arr + Acc + mask = traceA != 0 + return detA, traceA, mask + + +@cp.fuse +def _forstner_inner2(trace_masked, det_masked): + w_masked = det_masked / trace_masked + q_masked = 4 * det_masked / (trace_masked * trace_masked) + return w_masked, q_masked def corner_foerstner(image, sigma=1): @@ -802,23 +812,14 @@ def corner_foerstner(image, sigma=1): """ Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") - - # determinant - detA = Arr * Acc - detA -= Arc * Arc - # trace - traceA = Arr + Acc + # determinant and trace + detA, traceA, mask = _forstner_inner(Arr, Acc, Arc) w = cp.zeros_like(image, dtype=detA.dtype) q = cp.zeros_like(w) - - mask = traceA != 0 - - w[mask] = detA[mask] / traceA[mask] - tsq = traceA[mask] - tsq *= tsq - q[mask] = 4 * detA[mask] / tsq - + _w, _q = _forstner_inner2(traceA[mask], detA[mask]) + w[mask] = _w + q[mask] = _q return w, q From 8ee7593e840731940612a8b2892e2420eff4595c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Tue, 1 Nov 2022 13:48:15 -0400 Subject: [PATCH 2/5] flake8 fixes --- python/cucim/src/cucim/skimage/feature/corner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index 97611d852..8589ffed7 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -565,7 +565,9 @@ def corner_kitchen_rosenfeld(image, mode="constant", cval=0): imxy, imxx = _compute_derivatives(imx, mode=mode, cval=cval) imyy, _ = _compute_derivatives(imy, mode=mode, cval=cval) - numerator, denominator = _kitchen_rosenfeld_inner(imx, imy, imxx, imxy, imyy) + numerator, denominator = _kitchen_rosenfeld_inner( + imx, imy, imxx, imxy, imyy + ) response = cp.zeros_like(image, dtype=float_dtype) mask = denominator != 0 From d87e548765e9d46b85051677a3d2a51d16049e57 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Sat, 5 Nov 2022 22:21:42 -0400 Subject: [PATCH 3/5] convert fused kernel to ElementwiseKernel --- .../cucim/src/cucim/skimage/feature/corner.py | 167 ++++++++++++------ 1 file changed, 110 insertions(+), 57 deletions(-) diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index 8589ffed7..41aa05044 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -525,6 +525,29 @@ def _kitchen_rosenfeld_inner(imx, imy, imxx, imxy, imyy): return numerator, denominator +@cp.memoize() +def _get_kitchen_rosenfeld_kernel(): + + return cp.ElementwiseKernel( + in_params='F imx, F imy, F imxx, F imxy, F imyy', + out_params='F response', + operation=""" + F numerator, denominator, imx_sq, imy_sq; + imx_sq = imx * imx; + imy_sq = imy * imy; + denominator = imx_sq; + denominator += imy_sq; + if (denominator == 0) { + response = 0.0; + } else { + numerator = imxx * imy_sq + imyy * imx_sq - 2 * imxy * imx * imy; + response = numerator / denominator; + } + """, # noqa + name='cucim_feature_kitchen_rosenfeld' + ) + + def corner_kitchen_rosenfeld(image, mode="constant", cval=0): """Compute Kitchen and Rosenfeld corner measure response image. @@ -565,33 +588,45 @@ def corner_kitchen_rosenfeld(image, mode="constant", cval=0): imxy, imxx = _compute_derivatives(imx, mode=mode, cval=cval) imyy, _ = _compute_derivatives(imy, mode=mode, cval=cval) - numerator, denominator = _kitchen_rosenfeld_inner( - imx, imy, imxx, imxy, imyy + kernel = _get_kitchen_rosenfeld_kernel() + response = cp.empty_like(image) + return kernel(imx, imy, imxx, imxy, imyy, response) + + +@cp.memoize() +def _get_corner_harris_k_kernel(): + + return cp.ElementwiseKernel( + in_params='F Arr, F Acc, F Arc, float64 k', + out_params='F response', + operation=""" + F detA, traceA; + // determinant + detA = Arr * Acc - Arc * Arc; + // trace + traceA = Arr + Acc; + response = detA - k * traceA * traceA; + """, + name='cucim_skimage_feature_corner_harris_k' ) - response = cp.zeros_like(image, dtype=float_dtype) - mask = denominator != 0 - response[mask] = numerator[mask] / denominator[mask] - - return response - -@cp.fuse -def _corner_harris_inner_k(Arr, Acc, Arc, k): - # determinant - detA = Arr * Acc - Arc * Arc - # trace - traceA = Arr + Acc - return detA - k * traceA * traceA - - -@cp.fuse -def _corner_harris_inner(Arr, Acc, Arc, eps): - # determinant - detA = Arr * Acc - Arc * Arc - # trace - traceA = Arr + Acc - return 2 * detA / (traceA + eps) +@cp.memoize() +def _get_corner_harris_kernel(): + + return cp.ElementwiseKernel( + in_params='F Arr, F Acc, F Arc, float64 eps', + out_params='F response', + operation=""" + F detA, traceA; + // determinant + detA = Arr * Acc - Arc * Arc; + // trace + traceA = Arr + Acc; + response = 2 * detA / (traceA + eps); + """, + name='cucim_skimage_feature_corner_harris_k' + ) def corner_harris(image, method="k", k=0.05, eps=1e-6, sigma=1): @@ -659,16 +694,30 @@ def corner_harris(image, method="k", k=0.05, eps=1e-6, sigma=1): """ Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") + response = cp.zeros_like(Arr) if method == "k": - response = _corner_harris_inner_k(Arr, Acc, Arc, k) + kernel = _get_corner_harris_k_kernel() + kernel(Arr, Acc, Arc, k, response) else: - response = _corner_harris_inner_k(Arr, Acc, Arc, eps) + kernel = _get_corner_harris_kernel() + kernel(Arr, Acc, Arc, eps, response) return response -@cp.fuse() -def _shi_tomasi_fused(Arr, Acc, Arc): - return ((Arr + Acc) - cp.sqrt((Arr - Acc) ** 2 + 4 * Arc ** 2)) / 2 +@cp.memoize() +def _get_shi_tomasi_kernel(): + + return cp.ElementwiseKernel( + in_params='F Arr, F Acc, F Arc', + out_params='F response', + operation=""" + F tmp; + tmp = (Arr - Acc); + tmp *= tmp; + response = (Arr + Acc - sqrt(tmp + 4 * Arc * Arc)) / 2.0; + """, + name='cucim_skimage_feature_shi_tomasi' + ) def corner_shi_tomasi(image, sigma=1): @@ -727,24 +776,34 @@ def corner_shi_tomasi(image, sigma=1): """ Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") # minimum eigenvalue of A - return _shi_tomasi_fused(Arr, Acc, Arc) - - -@cp.fuse -def _forstner_inner(Arr, Acc, Arc): - # determinant - detA = Arr * Acc - Arc * Arc - # trace - traceA = Arr + Acc - mask = traceA != 0 - return detA, traceA, mask - - -@cp.fuse -def _forstner_inner2(trace_masked, det_masked): - w_masked = det_masked / trace_masked - q_masked = 4 * det_masked / (trace_masked * trace_masked) - return w_masked, q_masked + response = cp.zeros_like(Arr) + kernel = _get_shi_tomasi_kernel() + return kernel(Arr, Acc, Arc, response) + + +@cp.memoize() +def _get_forstner_kernel(): + + return cp.ElementwiseKernel( + in_params='F Arr, F Acc, F Arc', + out_params='F w, F q', + operation=""" + F detA, traceA; + + // determinant + detA = Arr * Acc - Arc * Arc; + // trace + traceA = Arr + Acc; + if (traceA == 0) { + w = 0; + q = 0; + } else { + w = detA / traceA; + q = 4 * detA / (traceA * traceA); + } + """, + name='cucim_skimage_feature_forstner' + ) def corner_foerstner(image, sigma=1): @@ -812,17 +871,11 @@ def corner_foerstner(image, sigma=1): [7, 7]]) """ - Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") - # determinant and trace - detA, traceA, mask = _forstner_inner(Arr, Acc, Arc) - - w = cp.zeros_like(image, dtype=detA.dtype) - q = cp.zeros_like(w) - _w, _q = _forstner_inner2(traceA[mask], detA[mask]) - w[mask] = _w - q[mask] = _q - return w, q + w = cp.empty_like(Arr) + q = cp.empty_like(Arr) + kernel = _get_forstner_kernel() + return kernel(Arr, Acc, Arc, w, q) def corner_peaks( From d56d9a037602b3e49e9a5db8c26d41ab7c562d44 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 18 Nov 2022 12:40:07 -0500 Subject: [PATCH 4/5] minor typo/style fixes --- python/cucim/src/cucim/skimage/feature/corner.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index 72fe668a0..03532e6dc 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -5,11 +5,10 @@ import cupy as cp import numpy as np import cucim.skimage._vendored.ndimage as ndi -from scipy import spatial # TODO: use RAPIDS cuSpatial? +from scipy import spatial # TODO: use cuSpatial if cKDTree becomes available from cucim.skimage.util import img_as_float -# from ..transform import integral_image from .._shared._gradient import gradient from .._shared.utils import _supported_float_type, warn from ..transform import integral_image @@ -1107,7 +1106,7 @@ def corner_shi_tomasi(image, sigma=1): @cp.memoize() -def _get_forstner_kernel(): +def _get_foerstner_kernel(): return cp.ElementwiseKernel( in_params='F Arr, F Acc, F Arc', @@ -1198,7 +1197,7 @@ def corner_foerstner(image, sigma=1): Arr, Arc, Acc = structure_tensor(image, sigma, order="rc") w = cp.empty_like(Arr) q = cp.empty_like(Arr) - kernel = _get_forstner_kernel() + kernel = _get_foerstner_kernel() return kernel(Arr, Acc, Arc, w, q) From 380d1ce4dacab56401bdeb6f4b0bdae66c2f82ce Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 18 Nov 2022 14:44:26 -0500 Subject: [PATCH 5/5] remove unused fused kernel make sure all inputs to kernel are C-contiguous --- python/cucim/src/cucim/skimage/feature/corner.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/cucim/src/cucim/skimage/feature/corner.py b/python/cucim/src/cucim/skimage/feature/corner.py index 03532e6dc..33fccd631 100644 --- a/python/cucim/src/cucim/skimage/feature/corner.py +++ b/python/cucim/src/cucim/skimage/feature/corner.py @@ -842,13 +842,6 @@ def shape_index(image, sigma=1, mode="constant", cval=0): return (2.0 / np.pi) * np.arctan((l2 + l1) / (l2 - l1)) -@cp.fuse() -def _kitchen_rosenfeld_inner(imx, imy, imxx, imxy, imyy): - numerator = imxx * imy ** 2 + imyy * imx ** 2 - 2 * imxy * imx * imy - denominator = imx ** 2 + imy ** 2 - return numerator, denominator - - @cp.memoize() def _get_kitchen_rosenfeld_kernel(): @@ -913,7 +906,7 @@ def corner_kitchen_rosenfeld(image, mode="constant", cval=0): imyy, _ = _compute_derivatives(imy, mode=mode, cval=cval) kernel = _get_kitchen_rosenfeld_kernel() - response = cp.empty_like(image) + response = cp.empty_like(image, order='C') return kernel(imx, imy, imxx, imxy, imyy, response)