Skip to content

Commit

Permalink
Merge pull request #3542 from cudawarped:cudacodec_videoreader_seek
Browse files Browse the repository at this point in the history
cudacodec::VideoReader: allow frame seek on initialization #3542

Allow seeking of video source on initialization of `cudacodec::VideoReader` when new variable `VideoReaderInitParams::iFirstFrame` != 0.

Dependant on opencv/opencv#24012

Fixes #3541.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [x] The PR is proposed to the proper branch
- [x] There is a reference to the original bug report and related work
- [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [x] The feature is well documented and sample code can be built with the project CMake
  • Loading branch information
cudawarped authored Nov 16, 2023
1 parent d51add3 commit 7b97851
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 46 deletions.
12 changes: 11 additions & 1 deletion modules/cudacodec/include/opencv2/cudacodec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,14 @@ class CV_EXPORTS_W RawVideoSource
@return `true` unless the property is unset set or not supported.
*/
virtual bool get(const int propertyId, double& propertyVal) const = 0;

/** @brief Retrieve the index of the first frame that will returned after construction.
@return index of the index of the first frame that will returned after construction.
@note To reduce the decoding overhead when initializing VideoReader to start its decoding from frame N, RawVideoSource should seek to the first valid key frame less than or equal to N and return that index here.
*/
virtual int getFirstFrameIdx() const = 0;
};

/** @brief VideoReader initialization parameters
Expand All @@ -561,9 +569,10 @@ but it cannot go below the number determined by NVDEC.
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
defaults to the full frame.
@param enableHistogram Request output of decoded luma histogram \a hist from VideoReader::nextFrame(GpuMat& frame, GpuMat& hist, Stream& stream), if hardware supported.
@param firstFrameIdx Index of the first frame to seek to on initialization of the VideoReader.
*/
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), enableHistogram(false){};
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0), enableHistogram(false), firstFrameIdx(0){};
CV_PROP_RW bool udpSource;
CV_PROP_RW bool allowFrameDrop;
CV_PROP_RW int minNumDecodeSurfaces;
Expand All @@ -572,6 +581,7 @@ struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
CV_PROP_RW cv::Rect srcRoi;
CV_PROP_RW cv::Rect targetRoi;
CV_PROP_RW bool enableHistogram;
CV_PROP_RW int firstFrameIdx;
};

/** @brief Creates video reader.
Expand Down
14 changes: 8 additions & 6 deletions modules/cudacodec/src/ffmpeg_video_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,21 @@ bool ParamSetsExist(unsigned char* parameterSets, const int szParameterSets, uns
return paramSetStartCodeLen != 0 && packetStartCodeLen != 0 && parameterSets[paramSetStartCodeLen] == data[packetStartCodeLen];
}

cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname, const std::vector<int>& _videoCaptureParams)
cv::cudacodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname, const std::vector<int>& _videoCaptureParams, const int iMaxStartFrame)
: videoCaptureParams(_videoCaptureParams)
{
if (!videoio_registry::hasBackend(CAP_FFMPEG))
CV_Error(Error::StsNotImplemented, "FFmpeg backend not found");

cap.open(fname, CAP_FFMPEG, videoCaptureParams);
if (!cap.isOpened())
videoCaptureParams.push_back(CAP_PROP_FORMAT);
videoCaptureParams.push_back(-1);
if (!cap.open(fname, CAP_FFMPEG, videoCaptureParams))
CV_Error(Error::StsUnsupportedFormat, "Unsupported video source");

if (!cap.set(CAP_PROP_FORMAT, -1)) // turn off video decoder (extract stream)
CV_Error(Error::StsUnsupportedFormat, "Fetching of RAW video streams is not supported");
CV_Assert(cap.get(CAP_PROP_FORMAT) == -1);
if (iMaxStartFrame) {
CV_Assert(cap.set(CAP_PROP_POS_FRAMES, iMaxStartFrame));
firstFrameIdx = static_cast<int>(cap.get(CAP_PROP_POS_FRAMES));
}

const int codecExtradataIndex = static_cast<int>(cap.get(CAP_PROP_CODEC_EXTRADATA_INDEX));
Mat tmpExtraData;
Expand Down
5 changes: 4 additions & 1 deletion modules/cudacodec/src/ffmpeg_video_source.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ namespace cv { namespace cudacodec { namespace detail {
class FFmpegVideoSource : public RawVideoSource
{
public:
FFmpegVideoSource(const String& fname, const std::vector<int>& params);
FFmpegVideoSource(const String& fname, const std::vector<int>& params, const int iMaxStartFrame);
~FFmpegVideoSource();

bool getNextPacket(unsigned char** data, size_t* size) CV_OVERRIDE;
Expand All @@ -66,12 +66,15 @@ class FFmpegVideoSource : public RawVideoSource

bool get(const int propertyId, double& propertyVal) const;

int getFirstFrameIdx() const { return firstFrameIdx; }

private:
FormatInfo format_;
VideoCapture cap;
Mat rawFrame, extraData, dataWithHeader;
int iFrame = 0;
std::vector<int> videoCaptureParams;
int firstFrameIdx = 0;
};

}}}
Expand Down
70 changes: 50 additions & 20 deletions modules/cudacodec/src/video_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ namespace
{
public:
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect(), const bool enableHistogram = false);
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect(), const bool enableHistogram = false, const int firstFrameIdx = 0);
~VideoReaderImpl();

bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
Expand All @@ -135,6 +135,9 @@ namespace
bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;

private:
bool skipFrame();
bool aquireFrameInfo(std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo, Stream& stream = Stream::Null());
void releaseFrameInfo(const std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo);
bool internalGrab(GpuMat & frame, GpuMat & histogram, Stream & stream);
void waitForDecoderInit();

Expand All @@ -154,6 +157,7 @@ namespace
static const int rawPacketsBaseIdx = 2;
ColorFormat colorFormat = ColorFormat::BGRA;
static const String errorMsg;
int iFrame = 0;
};

const String VideoReaderImpl::errorMsg = "Parsing/Decoding video source failed, check GPU memory is available and GPU supports requested functionality.";
Expand All @@ -173,7 +177,7 @@ namespace
}

VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
const Size targetSz, const Rect srcRoi, const Rect targetRoi, const bool enableHistogram) :
const Size targetSz, const Rect srcRoi, const Rect targetRoi, const bool enableHistogram, const int firstFrameIdx) :
videoSource_(source),
lock_(0)
{
Expand All @@ -190,6 +194,8 @@ namespace
videoSource_->setVideoParser(videoParser_);
videoSource_->start();
waitForDecoderInit();
for(iFrame = videoSource_->getFirstFrameIdx(); iFrame < firstFrameIdx; iFrame++)
CV_Assert(skipFrame());
videoSource_->updateFormat(videoDecoder_->format());
}

Expand All @@ -209,10 +215,7 @@ namespace
CUvideoctxlock m_lock;
};

bool VideoReaderImpl::internalGrab(GpuMat& frame, GpuMat& histogram, Stream& stream) {
if (videoParser_->hasError())
CV_Error(Error::StsError, errorMsg);
cudacodec::FormatInfo fmt;
bool VideoReaderImpl::aquireFrameInfo(std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo, Stream& stream) {
if (frames_.empty())
{
CUVIDPARSERDISPINFO displayInfo;
Expand All @@ -234,34 +237,53 @@ namespace

bool isProgressive = displayInfo.progressive_frame != 0;
const int num_fields = isProgressive ? 1 : 2 + displayInfo.repeat_first_field;
fmt = videoDecoder_->format();
videoSource_->updateFormat(fmt);

for (int active_field = 0; active_field < num_fields; ++active_field)
{
CUVIDPROCPARAMS videoProcParams;
std::memset(&videoProcParams, 0, sizeof(CUVIDPROCPARAMS));

videoProcParams.progressive_frame = displayInfo.progressive_frame;
videoProcParams.second_field = active_field;
videoProcParams.top_field_first = displayInfo.top_field_first;
videoProcParams.unpaired_field = (num_fields == 1);
videoProcParams.second_field = active_field;
videoProcParams.top_field_first = displayInfo.top_field_first;
videoProcParams.unpaired_field = (num_fields == 1);
videoProcParams.output_stream = StreamAccessor::getStream(stream);

frames_.push_back(std::make_pair(displayInfo, videoProcParams));
}
}
else {
for (auto& frame : frames_)
frame.second.output_stream = StreamAccessor::getStream(stream);
}

if (frames_.empty())
return false;

std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo = frames_.front();
frameInfo = frames_.front();
frames_.pop_front();
return true;
}

void VideoReaderImpl::releaseFrameInfo(const std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS>& frameInfo) {
// release the frame, so it can be re-used in decoder
if (frames_.empty())
frameQueue_->releaseFrame(frameInfo.first);
}

bool VideoReaderImpl::internalGrab(GpuMat& frame, GpuMat& histogram, Stream& stream) {
if (videoParser_->hasError())
CV_Error(Error::StsError, errorMsg);

std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo;
if (!aquireFrameInfo(frameInfo, stream))
return false;

{
VideoCtxAutoLock autoLock(lock_);

unsigned long long cuHistogramPtr = 0;
const cudacodec::FormatInfo fmt = videoDecoder_->format();
if (fmt.enableHistogram)
frameInfo.second.histogram_dptr = &cuHistogramPtr;

Expand All @@ -281,10 +303,16 @@ namespace
videoDecoder_->unmapFrame(decodedFrame);
}

// release the frame, so it can be re-used in decoder
if (frames_.empty())
frameQueue_->releaseFrame(frameInfo.first);
releaseFrameInfo(frameInfo);
iFrame++;
return true;
}

bool VideoReaderImpl::skipFrame() {
std::pair<CUVIDPARSERDISPINFO, CUVIDPROCPARAMS> frameInfo;
if (!aquireFrameInfo(frameInfo))
return false;
releaseFrameInfo(frameInfo);
return true;
}

Expand Down Expand Up @@ -399,6 +427,10 @@ namespace
}

bool VideoReaderImpl::get(const int propertyId, double& propertyVal) const {
if (propertyId == cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES) {
propertyVal = static_cast<double>(iFrame);
return true;
}
return videoSource_->get(propertyId, propertyVal);
}

Expand All @@ -421,28 +453,26 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
CV_Assert(!filename.empty());

Ptr<VideoSource> videoSource;

try
{
// prefer ffmpeg to cuvidGetSourceVideoFormat() which doesn't always return the corrct raw pixel format
Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, sourceParams));
Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, sourceParams, params.firstFrameIdx));
videoSource.reset(new RawVideoSourceWrapper(source, params.rawMode));
}
catch (...)
{
if (sourceParams.size()) throw;
videoSource.reset(new CuvidVideoSource(filename));
}

return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi, params.enableHistogram);
params.srcRoi, params.targetRoi, params.enableHistogram, params.firstFrameIdx);
}

Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
{
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
params.srcRoi, params.targetRoi, params.enableHistogram);
params.srcRoi, params.targetRoi, params.enableHistogram, params.firstFrameIdx);
}

void cv::cudacodec::MapHist(const GpuMat& hist, Mat& histFull) {
Expand Down
4 changes: 4 additions & 0 deletions modules/cudacodec/src/video_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ bool cv::cudacodec::detail::RawVideoSourceWrapper::get(const int propertyId, dou
return source_->get(propertyId, propertyVal);
}

int cv::cudacodec::detail::RawVideoSourceWrapper::getFirstFrameIdx() const {
return source_->getFirstFrameIdx();
}

void cv::cudacodec::detail::RawVideoSourceWrapper::start()
{
stop_ = false;
Expand Down
2 changes: 2 additions & 0 deletions modules/cudacodec/src/video_source.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class VideoSource
virtual FormatInfo format() const = 0;
virtual void updateFormat(const FormatInfo& videoFormat) = 0;
virtual bool get(const int propertyId, double& propertyVal) const { return false; }
virtual int getFirstFrameIdx() const { return 0; }
virtual void start() = 0;
virtual void stop() = 0;
virtual bool isStarted() const = 0;
Expand Down Expand Up @@ -91,6 +92,7 @@ class RawVideoSourceWrapper : public VideoSource
FormatInfo format() const CV_OVERRIDE;
void updateFormat(const FormatInfo& videoFormat) CV_OVERRIDE;
bool get(const int propertyId, double& propertyVal) const CV_OVERRIDE;
int getFirstFrameIdx() const CV_OVERRIDE;
void start() CV_OVERRIDE;
void stop() CV_OVERRIDE;
bool isStarted() const CV_OVERRIDE;
Expand Down
59 changes: 41 additions & 18 deletions modules/cudacodec/test/test_video.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ struct CheckParams : SetDevice
{
};

struct Seek : SetDevice
{
};

#if defined(HAVE_NVCUVID)
//////////////////////////////////////////////////////
// VideoReader
Expand Down Expand Up @@ -542,36 +546,22 @@ CUDA_TEST_P(CheckParams, Reader)
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_OPEN_TIMEOUT_MSEC, msActual));
ASSERT_EQ(msActual, msReference);
}

{
std::vector<bool> exceptionsThrown = { false,true };
std::vector<int> capPropFormats = { -1,0 };
for (int i = 0; i < capPropFormats.size(); i++) {
bool exceptionThrown = false;
try {
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {
cv::VideoCaptureProperties::CAP_PROP_FORMAT, capPropFormats.at(i) });
}
catch (cv::Exception &ex) {
if (ex.code == Error::StsUnsupportedFormat)
exceptionThrown = true;
}
ASSERT_EQ(exceptionThrown, exceptionsThrown.at(i));
}
}
}

CUDA_TEST_P(CheckParams, CaptureProps)
{
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile);
double width, height, fps;
double width, height, fps, iFrame;
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FRAME_WIDTH, width));
ASSERT_EQ(672, width);
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FRAME_HEIGHT, height));
ASSERT_EQ(384, height);
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_FPS, fps));
ASSERT_EQ(24, fps);
ASSERT_TRUE(reader->grab());
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
ASSERT_EQ(iFrame, 1.);
}

CUDA_TEST_P(CheckDecodeSurfaces, Reader)
Expand Down Expand Up @@ -619,6 +609,37 @@ CUDA_TEST_P(CheckInitParams, Reader)
ASSERT_TRUE(reader->get(cv::cudacodec::VideoReaderProps::PROP_RAW_MODE, rawMode) && static_cast<bool>(rawMode) == params.rawMode);
}

CUDA_TEST_P(Seek, Reader)
{
#if defined(WIN32)
throw SkipTestException("Test disabled on Windows until the FFMpeg wrapper is updated to include PR24012.");
#endif
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../highgui/video/big_buck_bunny.mp4";
// seek to a non key frame
const int firstFrameIdx = 18;

GpuMat frameGs;
{
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
ASSERT_TRUE(readerGs->set(cudacodec::ColorFormat::GRAY));
for (int i = 0; i <= firstFrameIdx; i++)
ASSERT_TRUE(readerGs->nextFrame(frameGs));
}

cudacodec::VideoReaderInitParams params;
params.firstFrameIdx = firstFrameIdx;
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
double iFrame = 0.;
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
ASSERT_EQ(iFrame, static_cast<double>(firstFrameIdx));
ASSERT_TRUE(reader->set(cudacodec::ColorFormat::GRAY));
GpuMat frame;
ASSERT_TRUE(reader->nextFrame(frame));
ASSERT_EQ(cuda::norm(frameGs, frame, NORM_INF), 0.0);
ASSERT_TRUE(reader->get(cv::VideoCaptureProperties::CAP_PROP_POS_FRAMES, iFrame));
ASSERT_EQ(iFrame, static_cast<double>(firstFrameIdx+1));
}

#endif // HAVE_NVCUVID

#if defined(HAVE_NVCUVID) && defined(HAVE_NVCUVENC)
Expand Down Expand Up @@ -958,5 +979,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckInitParams, testing::Combine(
testing::Values("highgui/video/big_buck_bunny.mp4"),
testing::Values(true,false), testing::Values(true,false), testing::Values(true,false)));

INSTANTIATE_TEST_CASE_P(CUDA_Codec, Seek, ALL_DEVICES);

#endif // HAVE_NVCUVID || HAVE_NVCUVENC
}} // namespace

0 comments on commit 7b97851

Please sign in to comment.