Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scaling and cropping options to cudacodec::VideoReader #3355

Merged
merged 3 commits into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/cudacodec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")

ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)

ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev WRAP python)
ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)

ocv_module_include_directories()
ocv_glob_module_sources()
Expand Down
10 changes: 10 additions & 0 deletions modules/cudacodec/include/opencv2/cudacodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
CV_PROP_RW double fps;
CV_PROP_RW int ulNumDecodeSurfaces;//!< Maximum number of internal decode surfaces.
CV_PROP_RW DeinterlaceMode deinterlaceMode;
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
};

/** @brief cv::cudacodec::VideoReader generic properties identifier.
Expand Down Expand Up @@ -516,13 +519,20 @@ surfaces it requires for correct functionality and optimal video memory usage bu
overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
but it cannot go below the number determined by NVDEC.
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
@param targetSz Post-processed size (width/height should be multiples of 2) of the output frame, defaults to the size of the encoded video source.
@param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame.
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
defaults to the full frame.
*/
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
CV_PROP_RW bool udpSource;
CV_PROP_RW bool allowFrameDrop;
CV_PROP_RW int minNumDecodeSurfaces;
CV_PROP_RW bool rawMode;
CV_PROP_RW cv::Size targetSz;
CV_PROP_RW cv::Rect srcRoi;
CV_PROP_RW cv::Rect targetRoi;
};

/** @brief Creates video reader.
Expand Down
8 changes: 8 additions & 0 deletions modules/cudacodec/src/video_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
createInfo_.ulTargetHeight = videoFormat.height;
createInfo_.ulMaxWidth = videoFormat.ulMaxWidth;
createInfo_.ulMaxHeight = videoFormat.ulMaxHeight;
createInfo_.display_area.left = videoFormat.displayArea.x;
createInfo_.display_area.right = videoFormat.displayArea.x + videoFormat.displayArea.width;
createInfo_.display_area.top = videoFormat.displayArea.y;
createInfo_.display_area.bottom = videoFormat.displayArea.y + videoFormat.displayArea.height;
createInfo_.target_rect.left = videoFormat.targetRoi.x;
createInfo_.target_rect.right = videoFormat.targetRoi.x + videoFormat.targetRoi.width;
createInfo_.target_rect.top = videoFormat.targetRoi.y;
createInfo_.target_rect.bottom = videoFormat.targetRoi.y + videoFormat.targetRoi.height;
createInfo_.ulNumOutputSurfaces = 2;
createInfo_.ulCreationFlags = videoCreateFlags;
createInfo_.vidLock = lock_;
Expand Down
14 changes: 12 additions & 2 deletions modules/cudacodec/src/video_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,17 @@ namespace cv { namespace cudacodec { namespace detail {
class VideoDecoder
{
public:
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) :
ctx_(ctx), lock_(lock), decoder_(0)
{
videoFormat_.codec = codec;
videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces;
// alignment enforced by nvcuvid, likely due to chroma subsampling
videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2;
videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4;
videoFormat_.srcRoi.y = srcRoi.y - srcRoi.y % 2; videoFormat_.srcRoi.height = srcRoi.height - srcRoi.height % 2;
videoFormat_.targetRoi.x = targetRoi.x - targetRoi.x % 4; videoFormat_.targetRoi.width = targetRoi.width - targetRoi.width % 4;
videoFormat_.targetRoi.y = targetRoi.y - targetRoi.y % 2; videoFormat_.targetRoi.height = targetRoi.height - targetRoi.height % 2;
}

~VideoDecoder()
Expand All @@ -66,6 +73,9 @@ class VideoDecoder
// Get the code-type currently used.
cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }
cv::Size getTargetSz() const { return videoFormat_.targetSz; }
cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; }
cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; }

unsigned long frameWidth() const { return videoFormat_.ulWidth; }
unsigned long frameHeight() const { return videoFormat_.ulHeight; }
Expand All @@ -89,7 +99,7 @@ class VideoDecoder

cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );

return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch);
return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
}

void unmapFrame(cuda::GpuMat& frame)
Expand Down
15 changes: 12 additions & 3 deletions modules/cudacodec/src/video_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,19 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
newFormat.ulWidth = format->coded_width;
newFormat.ulHeight = format->coded_height;
newFormat.width = format->coded_width;
newFormat.height = format->coded_height;
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
newFormat.targetSz = thiz->videoDecoder_->getTargetSz();
newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width;
newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height;
newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi();
if (newFormat.srcRoi.empty()) {
format->display_area.right = format->coded_width;
format->display_area.bottom = format->coded_height;
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
}
else
newFormat.displayArea = newFormat.srcRoi;
newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi();
newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces)) :
format->min_num_decode_surfaces * 2, 32);
if (format->progressive_sequence)
Expand Down
14 changes: 9 additions & 5 deletions modules/cudacodec/src/video_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ namespace
class VideoReaderImpl : public VideoReader
{
public:
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false);
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect());
~VideoReaderImpl();

bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
Expand Down Expand Up @@ -131,7 +132,8 @@ namespace
return videoSource_->format();
}

VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) :
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
const Size targetSz, const Rect srcRoi, const Rect targetRoi) :
videoSource_(source),
lock_(0)
{
Expand All @@ -143,7 +145,7 @@ namespace
cuSafeCall( cuCtxGetCurrent(&ctx) );
cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
frameQueue_.reset(new FrameQueue());
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_));
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_));
videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource));
videoSource_->setVideoParser(videoParser_);
videoSource_->start();
Expand Down Expand Up @@ -357,13 +359,15 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
videoSource.reset(new CuvidVideoSource(filename));
}

return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource);
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi);
}

Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
{
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces);
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi);
}

#endif // HAVE_NVCUVID
2 changes: 2 additions & 0 deletions modules/cudacodec/test/test_precomp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
#include "opencv2/ts/cuda_test.hpp"

#include "opencv2/cudacodec.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/cudaarithm.hpp"

#include "cvconfig.h"

Expand Down
54 changes: 53 additions & 1 deletion modules/cudacodec/test/test_video.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t)
{
};

PARAM_TEST_CASE(Scaling, cv::cuda::DeviceInfo, std::string, Size2f, Rect2f, Rect2f)
{
};

PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
{
};
Expand Down Expand Up @@ -177,6 +181,47 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
}
}

CUDA_TEST_P(Scaling, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
const Size2f targetSzIn = GET_PARAM(2);
const Rect2f srcRoiIn = GET_PARAM(3);
const Rect2f targetRoiIn = GET_PARAM(4);

GpuMat frameOr;
{
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
readerGs->set(cudacodec::ColorFormat::GRAY);
ASSERT_TRUE(readerGs->nextFrame(frameOr));
}

cudacodec::VideoReaderInitParams params;
params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
params.targetSz.height * targetRoiIn.height);
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
reader->set(cudacodec::ColorFormat::GRAY);
GpuMat frame;
ASSERT_TRUE(reader->nextFrame(frame));
const cudacodec::FormatInfo format = reader->format();
Size targetSzOut;
targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2;
Rect srcRoiOut, targetRoiOut;
srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4;
srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2;
targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4;
targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2;
ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut);
ASSERT_TRUE(frame.size() == targetSzOut);
GpuMat frameGs;
cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA);
// assert on mean absolute error due to different resize algorithms
const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area();
ASSERT_LT(mae, 2.35);
}

CUDA_TEST_P(Video, Reader)
{
cv::cuda::setDevice(GET_PARAM(0).deviceID());
Expand Down Expand Up @@ -431,7 +476,14 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
ALL_DEVICES,
testing::Values("highgui/video/big_buck_bunny.mp4")));

#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
#define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));

#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
"highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg", \
"highgui/video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "highgui/video/sample_322x242_15frames.yuv420p.libaom-av1.mp4", \
"cv/tracking/faceocc2/data/faceocc2.webm"
Expand Down