From 969c99f3ff07551d3886bc1c1cfd2cde9b0fc5d3 Mon Sep 17 00:00:00 2001 From: Youness Dkhissi <83643173+younessdkhissi@users.noreply.github.com> Date: Fri, 26 Sep 2025 09:57:11 +0200 Subject: [PATCH 1/2] Optimize divisor search limit in get_greatest_divisor_below_bound --- torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu b/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu index ae496b37d48..24bf57724f3 100644 --- a/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu +++ b/torchvision/csrc/ops/cuda/deform_conv2d_kernel.cu @@ -305,7 +305,8 @@ void deformable_im2col( } int get_greatest_divisor_below_bound(int n, int bound) { - for (int k = bound; k > 1; --k) { + int limit = std::min(n/2, bound); + for (int k = limit; k > 1; --k) { if (n % k == 0) { return k; } From 0f030ff7c5878f747f73b6609b2b5f6e112077a4 Mon Sep 17 00:00:00 2001 From: Youness Dkhissi <83643173+younessdkhissi@users.noreply.github.com> Date: Fri, 26 Sep 2025 10:00:40 +0200 Subject: [PATCH 2/2] Optimize the calculation of Number of parallel imgs in cpu --- torchvision/csrc/ops/cpu/deform_conv2d_kernel.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/torchvision/csrc/ops/cpu/deform_conv2d_kernel.cpp b/torchvision/csrc/ops/cpu/deform_conv2d_kernel.cpp index f89e6cc3030..b6c90522d0a 100644 --- a/torchvision/csrc/ops/cpu/deform_conv2d_kernel.cpp +++ b/torchvision/csrc/ops/cpu/deform_conv2d_kernel.cpp @@ -244,12 +244,13 @@ void deformable_im2col( } int get_greatest_divisor_below_bound(int n, int bound) { - for (int k = bound; k > 1; --k) { - if (n % k == 0) { - return k; + int limit = std::min(n/2, bound); + for (int k = limit; k > 1; --k) { + if (n % k == 0) { + return k; + } } - } - return 1; + return 1; } template