Skip to content

Commit

Permalink
Merge branch 4.x
Browse files Browse the repository at this point in the history
  • Loading branch information
asmorkalov committed Jul 16, 2024
2 parents 496968c + b2c0ce0 commit 4e5b46b
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 18 deletions.
6 changes: 5 additions & 1 deletion modules/cudaarithm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-d
set(extra_dependencies "")
set(optional_dependencies "")
if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
if(UNIX AND NOT BUILD_SHARED_LIBS AND CUDA_VERSION_STRING VERSION_GREATER_EQUAL 9.2 AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.23)
set(CUDA_FFT_LIB_EXT "_static_nocallback")
endif()
list(APPEND extra_dependencies CUDA::cudart_static CUDA::nppial${CUDA_LIB_EXT} CUDA::nppc${CUDA_LIB_EXT} CUDA::nppitc${CUDA_LIB_EXT} CUDA::nppig${CUDA_LIB_EXT} CUDA::nppist${CUDA_LIB_EXT} CUDA::nppidei${CUDA_LIB_EXT})
if(HAVE_CUBLAS)
list(APPEND optional_dependencies CUDA::cublas${CUDA_LIB_EXT})
Expand All @@ -18,7 +21,8 @@ if(ENABLE_CUDA_FIRST_CLASS_LANGUAGE)
endif()
if(HAVE_CUFFT)
# static version requires seperable compilation which is incompatible with opencv's current library structure
list(APPEND optional_dependencies CUDA::cufft)
# the cufft_static_nocallback variant does not requires seperable compilation. callbacks are currently not used.
list(APPEND optional_dependencies CUDA::cufft${CUDA_FFT_LIB_EXT})
endif()
else()
if(HAVE_CUBLAS)
Expand Down
4 changes: 3 additions & 1 deletion modules/cudafilters/include/opencv2/cudafilters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,14 @@ CV_EXPORTS_W Ptr<Filter> createLaplacianFilter(int srcType, int dstType, int ksi
////////////////////////////////////////////////////////////////////////////////////////////////////
// Separable Linear Filter

/** @brief Creates a separable linear filter.
/** @brief Creates a separable linear filter. In-place processing is supported.
@param srcType Source array type.
@param dstType Destination array type.
@param rowKernel Horizontal filter coefficients. Support kernels with size \<= 32 .
noArray() is supported to ignore the row filtering.
@param columnKernel Vertical filter coefficients. Support kernels with size \<= 32 .
noArray() is supported to ignore the column filtering.
@param anchor Anchor position within the kernel. Negative values mean that anchor is positioned at
the aperture center.
@param rowBorderMode Pixel extrapolation method in the vertical direction For details, see
Expand Down
70 changes: 55 additions & 15 deletions modules/cudafilters/src/filtering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,28 +386,38 @@ namespace
const int cn = CV_MAT_CN(srcType);
const int ddepth = CV_MAT_DEPTH(dstType);

Mat rowKernel = _rowKernel.getMat();
Mat columnKernel = _columnKernel.getMat();
CV_Assert( _rowKernel.empty() || _rowKernel.isMat() );
CV_Assert( _columnKernel.empty() || _columnKernel.isMat() );
Mat rowKernel = _rowKernel.empty() ? cv::Mat() : _rowKernel.getMat();
Mat columnKernel = _columnKernel.empty() ? cv::Mat() : _columnKernel.getMat();

CV_Assert( sdepth <= CV_64F && cn <= 4 );
CV_Assert( rowKernel.channels() == 1 );
CV_Assert( columnKernel.channels() == 1 );
CV_Assert( rowKernel.empty() || rowKernel.channels() == 1 );
CV_Assert( columnKernel.empty() || columnKernel.channels() == 1 );
CV_Assert( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP );
CV_Assert( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP );

Mat kernel32F;

rowKernel.convertTo(kernel32F, CV_32F);
rowKernel_.upload(kernel32F.reshape(1, 1));
if (!rowKernel.empty())
{
rowKernel.convertTo(kernel32F, CV_32F);
rowKernel_.upload(kernel32F.reshape(1, 1));
}

columnKernel.convertTo(kernel32F, CV_32F);
columnKernel_.upload(kernel32F.reshape(1, 1));
if (!columnKernel.empty())
{
columnKernel.convertTo(kernel32F, CV_32F);
columnKernel_.upload(kernel32F.reshape(1, 1));
}

CV_Assert( rowKernel_.cols > 0 && rowKernel_.cols <= 32 );
CV_Assert( columnKernel_.cols > 0 && columnKernel_.cols <= 32 );
CV_Assert( rowKernel_.empty() || (rowKernel_.cols > 0 && rowKernel_.cols <= 32 ));
CV_Assert( columnKernel_.empty() || (columnKernel_.cols > 0 && columnKernel_.cols <= 32 ));

normalizeAnchor(anchor_.x, rowKernel_.cols);
normalizeAnchor(anchor_.y, columnKernel_.cols);
if (!rowKernel_.empty())
normalizeAnchor(anchor_.x, rowKernel_.cols);
if (!columnKernel_.empty())
normalizeAnchor(anchor_.y, columnKernel_.cols);

bufType_ = CV_MAKE_TYPE(CV_32F, cn);

Expand All @@ -426,15 +436,45 @@ namespace
_dst.create(src.size(), dstType_);
GpuMat dst = _dst.getGpuMat();

ensureSizeIsEnough(src.size(), bufType_, buf_);
const bool isInPlace = (src.data == dst.data);
const bool hasRowKernel = !rowKernel_.empty();
const bool hasColKernel = !columnKernel_.empty();
const bool hasSingleKernel = (hasRowKernel ^ hasColKernel);
const bool needsSrcAdaptation = !hasRowKernel && hasColKernel && (srcType_ != bufType_);
const bool needsDstAdaptation = hasRowKernel && !hasColKernel && (dstType_ != bufType_);
const bool needsBufForIntermediateStorage = (hasRowKernel && hasColKernel) || (hasSingleKernel && isInPlace);
const bool needsBuf = needsSrcAdaptation || needsDstAdaptation || needsBufForIntermediateStorage;
if (needsBuf)
ensureSizeIsEnough(src.size(), bufType_, buf_);

if (needsSrcAdaptation)
src.convertTo(buf_, bufType_, _stream);
GpuMat& srcAdapted = needsSrcAdaptation ? buf_ : src;

DeviceInfo devInfo;
const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();

cudaStream_t stream = StreamAccessor::getStream(_stream);

rowFilter_(src, buf_, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
columnFilter_(buf_, dst, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
if (!hasRowKernel && !hasColKernel && !isInPlace)
srcAdapted.convertTo(dst, dstType_, _stream);
else if (hasRowKernel || hasColKernel)
{
GpuMat& rowFilterSrc = srcAdapted;
GpuMat& rowFilterDst = !hasRowKernel ? srcAdapted : needsBuf ? buf_ : dst;
GpuMat& colFilterSrc = hasColKernel && needsBuf ? buf_ : srcAdapted;
GpuMat& colFilterTo = dst;

if (hasRowKernel)
rowFilter_(rowFilterSrc, rowFilterDst, rowKernel_.ptr<float>(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream);
else if (hasColKernel && (needsBufForIntermediateStorage && !needsSrcAdaptation))
rowFilterSrc.convertTo(buf_, bufType_, _stream);

if (hasColKernel)
columnFilter_(colFilterSrc, colFilterTo, columnKernel_.ptr<float>(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream);
else if (needsBuf)
buf_.convertTo(dst, dstType_, _stream);
}
}
}

Expand Down
80 changes: 80 additions & 0 deletions modules/cudafilters/test/test_filters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,86 @@ INSTANTIATE_TEST_CASE_P(CUDA_Filters, SeparableLinearFilter, testing::Combine(
BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));

PARAM_TEST_CASE(SeparableLinearFilterWithEmptyKernels, cv::cuda::DeviceInfo, MatDepth, Channels, MatDepth, bool, bool, bool)
{
cv::cuda::DeviceInfo devInfo;
bool inPlace;
bool useRowKernel;
bool useColKernel;

cv::Size size;
int srcDepth;
int cn;
int dstDepth;
cv::Size ksize;
cv::Point anchor;
int borderType;
int srcType;
int dstType;

virtual void SetUp()
{
devInfo = GET_PARAM(0);
srcDepth = GET_PARAM(1);
cn = GET_PARAM(2);
dstDepth = GET_PARAM(3);
inPlace = GET_PARAM(4);
useRowKernel = GET_PARAM(5);
useColKernel = GET_PARAM(6);

size = cv::Size(640, 480);
ksize = cv::Size(3, 1);
anchor = cv::Point(-1, -1);
borderType = cv::BORDER_REPLICATE;

cv::cuda::setDevice(devInfo.deviceID());

srcType = CV_MAKE_TYPE(srcDepth, cn);
dstType = CV_MAKE_TYPE(dstDepth, cn);
}
};

CUDA_TEST_P(SeparableLinearFilterWithEmptyKernels, Accuracy)
{
cv::Mat src = randomMat(size, srcType);
cv::Mat rowKernel = (cv::Mat_<float>(ksize) << -1, 0, 1);
cv::Mat colKernel = rowKernel.t();
cv::Mat oneKernel = cv::Mat::ones(cv::Size(1, 1), CV_32FC1);
cv::Mat noKernel = cv::Mat();

cv::Ptr<cv::cuda::Filter> sepFilterDummyKernels =
cv::cuda::createSeparableLinearFilter(srcType, dstType,
useRowKernel ? rowKernel : oneKernel,
useColKernel ? colKernel : oneKernel,
cv::Point(-1, -1), cv::BORDER_REPLICATE, cv::BORDER_REPLICATE);

cv::Ptr<cv::cuda::Filter> sepFilterEmptyKernels =
cv::cuda::createSeparableLinearFilter(srcType, dstType,
useRowKernel ? rowKernel : noKernel,
useColKernel ? colKernel : noKernel,
cv::Point(-1, -1), cv::BORDER_REPLICATE, cv::BORDER_REPLICATE);

cv::cuda::GpuMat src_sep_dummyK = loadMat(src);
cv::cuda::GpuMat dst_sep_dummyK = inPlace ? src_sep_dummyK : cv::cuda::GpuMat();
cv::cuda::GpuMat src_sep_emptyK = loadMat(src);
cv::cuda::GpuMat dst_sep_emptyK = inPlace ? src_sep_emptyK : cv::cuda::GpuMat();

sepFilterDummyKernels->apply(src_sep_dummyK, dst_sep_dummyK);
sepFilterEmptyKernels->apply(src_sep_emptyK, dst_sep_emptyK);

EXPECT_MAT_NEAR(dst_sep_dummyK, dst_sep_emptyK, src.depth() < CV_32F ? 1.0 : 1e-2);
}

INSTANTIATE_TEST_CASE_P(CUDA_Filters, SeparableLinearFilterWithEmptyKernels, testing::Combine(
ALL_DEVICES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
IMAGE_CHANNELS,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)),
testing::Values(false, true),//in-place
testing::Values(false, true),//use row kernel
testing::Values(false, true)//use col kernel
));

/////////////////////////////////////////////////////////////////////////////////////////////////
// Sobel

Expand Down
2 changes: 1 addition & 1 deletion modules/wechat_qrcode/samples/qrcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
except:
print("---------------------------------------------------------------")
print("Failed to initialize WeChatQRCode.")
print("Please, download 'detector.*' and 'sr.*' from")
print("Please, download 'detect.*' and 'sr.*' from")
print("https://github.com/WeChatCV/opencv_3rdparty/tree/wechat_qrcode")
print("and put them into the current directory.")
print("---------------------------------------------------------------")
Expand Down

0 comments on commit 4e5b46b

Please sign in to comment.