Merge pull request #3247 from cudawarped:videoreader_add_rtsp_feature

Add RTSP features to cudacodec::VideoReader * Add live video source enhancements, e.g. rtsp from ip camera's Add error logs. * Fix type. * Change badly named flag. * Alter live source flag everywhere to indicate what it does not what it is for, which should be left up to the documentation. * Prevent frame que object from being reinitialized which could be unsafe if another thread and/or object is using it.
opencv · Jun 2, 2022 · b2904b9 · b2904b9
1 parent 84f8ea8
commit b2904b9
Show file tree

Hide file tree

Showing 8 changed files with 155 additions and 63 deletions.
diff --git a/modules/cudacodec/include/opencv2/cudacodec.hpp b/modules/cudacodec/include/opencv2/cudacodec.hpp
@@ -321,6 +321,8 @@ enum class VideoReaderProps {
     PROP_RAW_MODE = 4, //!< Status of raw mode.
     PROP_LRF_HAS_KEY_FRAME = 5, //!< FFmpeg source only - Indicates whether the Last Raw Frame (LRF), output from VideoReader::retrieve() when VideoReader is initialized in raw mode, contains encoded data for a key frame.
     PROP_COLOR_FORMAT = 6, //!< Set the ColorFormat of the decoded frame.  This can be changed before every call to nextFrame() and retrieve().
+    PROP_UDP_SOURCE = 7, //!< Status of VideoReaderInitParams::udpSource initialization.
+    PROP_ALLOW_FRAME_DROP = 8, //!< Status of VideoReaderInitParams::allowFrameDrop initialization.
 #ifndef CV_DOXYGEN
     PROP_NOT_SUPPORTED
 #endif
@@ -468,32 +470,43 @@ class CV_EXPORTS_W RawVideoSource
     virtual bool get(const int propertyId, double& propertyVal) const = 0;
 };
 
+/** @brief VideoReader initialization parameters
+@param udpSource Remove validation which can cause VideoReader() to throw exceptions when reading from a UDP source.
+@param allowFrameDrop Allow frames to be dropped when ingesting from a live capture source to prevent delay and eventual disconnection
+when calls to nextFrame()/grab() cannot keep up with the source's fps.  Only use if delay and disconnection are a problem, i.e. not when decoding from
+video files where setting this flag will cause frames to be unnecessarily discarded.
+@param minNumDecodeSurfaces Minimum number of internal decode surfaces used by the hardware decoder.  NVDEC will automatically determine the minimum number of
+surfaces it requires for correct functionality and optimal video memory usage but not necessarily for best performance, which depends on the design of the
+overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
+but it cannot go below the number determined by NVDEC.
+@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
+*/
+struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
+    CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
+    CV_PROP_RW bool udpSource;
+    CV_PROP_RW bool allowFrameDrop;
+    CV_PROP_RW int minNumDecodeSurfaces;
+    CV_PROP_RW bool rawMode;
+};
+
 /** @brief Creates video reader.
 
 @param filename Name of the input video file.
-@param params Pass through parameters for VideoCapure.  VideoCapture with the FFMpeg back end (CAP_FFMPEG) is used to parse the video input.
-The `params` parameter allows to specify extra parameters encoded as pairs `(paramId_1, paramValue_1, paramId_2, paramValue_2, ...)`.
+@param sourceParams Pass through parameters for VideoCapure.  VideoCapture with the FFMpeg back end (CAP_FFMPEG) is used to parse the video input.
+The `sourceParams` parameter allows to specify extra parameters encoded as pairs `(paramId_1, paramValue_1, paramId_2, paramValue_2, ...)`.
     See cv::VideoCaptureProperties
 e.g. when streaming from an RTSP source CAP_PROP_OPEN_TIMEOUT_MSEC may need to be set.
-@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
-@param minNumDecodeSurfaces Minimum number of internal decode surfaces used by the hardware decoder.  NVDEC will automatically determine the minimum number of
-surfaces it requires for correct functionality and optimal video memory usage but not necessarily for best performance, which depends on the design of the
-overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
-but it cannot go below the number determined by NVDEC.
+@param params Initializaton parameters. See cv::cudacodec::VideoReaderInitParams.
 
 FFMPEG is used to read videos. User can implement own demultiplexing with cudacodec::RawVideoSource
  */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename, const std::vector<int>& params = {}, const bool rawMode = false, const int minNumDecodeSurfaces = 0);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const String& filename, const std::vector<int>& sourceParams = {}, const VideoReaderInitParams params = VideoReaderInitParams());
 
 /** @overload
 @param source RAW video source implemented by user.
-@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
-@param minNumDecodeSurfaces Minimum number of internal decode surfaces used by the hardware decoder.  NVDEC will automatically determine the minimum number of
-surfaces it requires for correct functionality and optimal video memory usage but not necessarily for best performance, which depends on the design of the
-overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
-but it cannot go below the number determined by NVDEC.
+@param params Initializaton parameters. See cv::cudacodec::VideoReaderInitParams.
 */
-CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode = false, const int minNumDecodeSurfaces = 0);
+CV_EXPORTS_W Ptr<VideoReader> createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params = VideoReaderInitParams());
 
 //! @}
 

diff --git a/modules/cudacodec/src/frame_queue.cpp b/modules/cudacodec/src/frame_queue.cpp
@@ -57,16 +57,20 @@ cv::cudacodec::detail::FrameQueue::~FrameQueue() {
 
 void cv::cudacodec::detail::FrameQueue::init(const int _maxSz) {
     AutoLock autoLock(mtx_);
+    if (isFrameInUse_)
+        return;
     maxSz = _maxSz;
     displayQueue_ = std::vector<CUVIDPARSERDISPINFO>(maxSz, CUVIDPARSERDISPINFO());
     isFrameInUse_ = new volatile int[maxSz];
     std::memset((void*)isFrameInUse_, 0, sizeof(*isFrameInUse_) * maxSz);
 }
 
-bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex)
+bool cv::cudacodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop)
 {
     while (isInUse(pictureIndex))
     {
+        if (allowFrameDrop && dequeueUntil(pictureIndex))
+            break;
         // Decoder is getting too far ahead from display
         Thread::sleep(1);
 
@@ -110,6 +114,20 @@ void cv::cudacodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picPa
     } while (!isEndOfDecode());
 }
 
+bool cv::cudacodec::detail::FrameQueue::dequeueUntil(const int pictureIndex) {
+    AutoLock autoLock(mtx_);
+    if (isFrameInUse_[pictureIndex] != 1)
+        return false;
+    for (int i = 0; i < framesInQueue_; i++) {
+        const bool found = displayQueue_.at(readPosition_).picture_index == pictureIndex;
+        isFrameInUse_[displayQueue_.at(readPosition_).picture_index] = 0;
+        framesInQueue_--;
+        readPosition_ = (readPosition_ + 1) % maxSz;
+        if (found) return true;
+    }
+    return false;
+}
+
 bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets)
 {
     AutoLock autoLock(mtx_);
@@ -124,6 +142,7 @@ bool cv::cudacodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo
         }
         readPosition_ = (entry + 1) % maxSz;
         framesInQueue_--;
+        isFrameInUse_[displayInfo.picture_index] = 2;
         return true;
     }
 

diff --git a/modules/cudacodec/src/frame_queue.hpp b/modules/cudacodec/src/frame_queue.hpp
@@ -72,7 +72,9 @@ class FrameQueue
     // If the requested frame is available the method returns true.
     // If decoding was interrupted before the requested frame becomes
     // available, the method returns false.
-    bool waitUntilFrameAvailable(int pictureIndex);
+    // If allowFrameDrop == true, spin is disabled and n > 0 frames are discarded
+    // to ensure a frame is available.
+    bool waitUntilFrameAvailable(int pictureIndex, const bool allowFrameDrop = false);
 
     void enqueue(const CUVIDPARSERDISPINFO* picParams, const std::vector<RawPacket> rawPackets);
 
@@ -84,8 +86,16 @@ class FrameQueue
     //      false, if the queue was empty and no new frame could be returned.
     bool dequeue(CUVIDPARSERDISPINFO& displayInfo, std::vector<RawPacket>& rawPackets);
 
-    void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = false; }
+    // Deque all frames up to and including the frame with index pictureIndex - must only
+    // be called in the same thread as enqueue.
+    // Parameters:
+    //      pictureIndex - Display index of the frame.
+    // Returns:
+    //      true, if successful,
+    //      false, if no frames are dequed.
+    bool dequeueUntil(const int pictureIndex);
 
+    void releaseFrame(const CUVIDPARSERDISPINFO& picParams) { isFrameInUse_[picParams.picture_index] = 0; }
 private:
     bool isInUse(int pictureIndex) const { return isFrameInUse_[pictureIndex] != 0; }
 

diff --git a/modules/cudacodec/src/video_decoder.cpp b/modules/cudacodec/src/video_decoder.cpp
@@ -64,7 +64,10 @@ static const char* GetVideoChromaFormatString(cudaVideoChromaFormat eChromaForma
 
 void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
 {
-    videoFormat_ = videoFormat;
+    {
+        AutoLock autoLock(mtx_);
+        videoFormat_ = videoFormat;
+    }
     const cudaVideoCodec _codec = static_cast<cudaVideoCodec>(videoFormat.codec);
     const cudaVideoChromaFormat _chromaFormat = static_cast<cudaVideoChromaFormat>(videoFormat.chromaFormat);
     if (videoFormat.nBitDepthMinus8 > 0) {
@@ -120,9 +123,10 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
     cuSafeCall(cuCtxPushCurrent(ctx_));
     cuSafeCall(cuvidGetDecoderCaps(&decodeCaps));
     cuSafeCall(cuCtxPopCurrent(NULL));
-    if (!(decodeCaps.bIsSupported && (decodeCaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))))
+    if (!(decodeCaps.bIsSupported && (decodeCaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)))){
         CV_Error(Error::StsUnsupportedFormat, "Video source is not supported by hardware video decoder");
-
+        CV_LOG_ERROR(NULL, "Video source is not supported by hardware video decoder.");
+    }
     CV_Assert(videoFormat.ulWidth >= decodeCaps.nMinWidth &&
         videoFormat.ulHeight >= decodeCaps.nMinHeight &&
         videoFormat.ulWidth <= decodeCaps.nMaxWidth &&

diff --git a/modules/cudacodec/src/video_parser.cpp b/modules/cudacodec/src/video_parser.cpp
@@ -45,9 +45,10 @@
 
 #ifdef HAVE_NVCUVID
 
-cv::cudacodec::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue) :
-    videoDecoder_(videoDecoder), frameQueue_(frameQueue), unparsedPackets_(0), hasError_(false)
+cv::cudacodec::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue, const bool allowFrameDrop, const bool udpSource) :
+    videoDecoder_(videoDecoder), frameQueue_(frameQueue), allowFrameDrop_(allowFrameDrop)
 {
+    if (udpSource) maxUnparsedPackets_ = 0;
     CUVIDPARSERPARAMS params;
     std::memset(&params, 0, sizeof(CUVIDPARSERPARAMS));
 
@@ -78,16 +79,17 @@ bool cv::cudacodec::detail::VideoParser::parseVideoData(const unsigned char* dat
 
     if (cuvidParseVideoData(parser_, &packet) != CUDA_SUCCESS)
     {
+        CV_LOG_ERROR(NULL, "Call to cuvidParseVideoData failed!");
         hasError_ = true;
         frameQueue_->endDecode();
         return false;
     }
 
-    constexpr int maxUnparsedPackets = 20;
-
     ++unparsedPackets_;
-    if (unparsedPackets_ > maxUnparsedPackets)
+    if (maxUnparsedPackets_ && unparsedPackets_ > maxUnparsedPackets_)
     {
+        CV_LOG_ERROR(NULL, "Maxium number of packets (" << maxUnparsedPackets_ << ") parsed without decoding a frame or reconfiguring the decoder, if reading from \
+            a live source consider initializing with VideoReaderInitParams::udpSource == true.");
         hasError_ = true;
         frameQueue_->endDecode();
         return false;
@@ -122,7 +124,8 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
         newFormat.height = format->coded_height;
         newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
         newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
-        newFormat.ulNumDecodeSurfaces = max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces));
+        newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces)) :
+            format->min_num_decode_surfaces * 2, 32);
         if (format->progressive_sequence)
             newFormat.deinterlaceMode = Weave;
         else
@@ -149,6 +152,7 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
         }
         catch (const cv::Exception&)
         {
+            CV_LOG_ERROR(NULL, "Attempt to reconfigure Nvidia decoder failed!");
             thiz->hasError_ = true;
             return false;
         }
@@ -163,13 +167,13 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandlePictureDecode(void* userDa
 
     thiz->unparsedPackets_ = 0;
 
-    bool isFrameAvailable = thiz->frameQueue_->waitUntilFrameAvailable(picParams->CurrPicIdx);
-
+    bool isFrameAvailable = thiz->frameQueue_->waitUntilFrameAvailable(picParams->CurrPicIdx, thiz->allowFrameDrop_);
     if (!isFrameAvailable)
         return false;
 
     if (!thiz->videoDecoder_->decodePicture(picParams))
     {
+        CV_LOG_ERROR(NULL, "Decoding failed!");
         thiz->hasError_ = true;
         return false;
     }

diff --git a/modules/cudacodec/src/video_parser.hpp b/modules/cudacodec/src/video_parser.hpp
@@ -52,7 +52,7 @@ namespace cv { namespace cudacodec { namespace detail {
 class VideoParser
 {
 public:
-    VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue);
+    VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue, const bool allowFrameDrop = false, const bool udpSource = false);
 
     ~VideoParser()
     {
@@ -63,13 +63,19 @@ class VideoParser
 
     bool hasError() const { return hasError_; }
 
+    bool udpSource() const { return  maxUnparsedPackets_ == 0; }
+
+    bool allowFrameDrops() const { return allowFrameDrop_; }
+
 private:
-    VideoDecoder* videoDecoder_;
-    FrameQueue* frameQueue_;
+    VideoDecoder* videoDecoder_ = 0;
+    FrameQueue* frameQueue_ = 0;
     CUvideoparser parser_;
-    int unparsedPackets_;
+    int unparsedPackets_ = 0;
+    int maxUnparsedPackets_ = 20;
     std::vector<RawPacket> currentFramePackets;
-    volatile bool hasError_;
+    volatile bool hasError_ = false;
+    bool allowFrameDrop_ = false;
 
     // Called when the decoder encounters a video format change (or initial sequence header)
     // This particular implementation of the callback returns 0 in case the video format changes

diff --git a/modules/cudacodec/src/video_reader.cpp b/modules/cudacodec/src/video_reader.cpp
@@ -48,8 +48,8 @@ using namespace cv::cudacodec;
 
 #ifndef HAVE_NVCUVID
 
-Ptr<VideoReader> cv::cudacodec::createVideoReader(const String&, const std::vector<int>&, const bool, const int) { throw_no_cuda(); return Ptr<VideoReader>(); }
-Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, const bool, const int) { throw_no_cuda(); return Ptr<VideoReader>(); }
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const String&, const std::vector<int>&, const VideoReaderInitParams) { throw_no_cuda(); return Ptr<VideoReader>(); }
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>&, const VideoReaderInitParams) { throw_no_cuda(); return Ptr<VideoReader>(); }
 
 #else // HAVE_NVCUVID
 
@@ -86,7 +86,7 @@ namespace
     class VideoReaderImpl : public VideoReader
     {
     public:
-        explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces);
+        explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false);
         ~VideoReaderImpl();
 
         bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
@@ -130,7 +130,7 @@ namespace
         return videoSource_->format();
     }
 
-    VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces) :
+    VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) :
         videoSource_(source),
         lock_(0)
     {
@@ -143,7 +143,7 @@ namespace
         cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
         frameQueue_.reset(new FrameQueue());
         videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_));
-        videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_));
+        videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource));
         videoSource_->setVideoParser(videoParser_);
         videoSource_->start();
     }
@@ -291,10 +291,10 @@ namespace
         case VideoReaderProps::PROP_NUMBER_OF_RAW_PACKAGES_SINCE_LAST_GRAB:
             propertyVal = rawPackets.size();
             return true;
-        case::VideoReaderProps::PROP_RAW_MODE:
+        case VideoReaderProps::PROP_RAW_MODE:
             propertyVal = videoSource_->RawModeEnabled();
             return true;
-        case::VideoReaderProps::PROP_LRF_HAS_KEY_FRAME: {
+        case VideoReaderProps::PROP_LRF_HAS_KEY_FRAME: {
             const int iPacket = propertyVal - rawPacketsBaseIdx;
             if (videoSource_->RawModeEnabled() && iPacket >= 0 && iPacket < rawPackets.size()) {
                 propertyVal = rawPackets.at(iPacket).containsKeyFrame;
@@ -303,6 +303,14 @@ namespace
             else
                 break;
         }
+        case VideoReaderProps::PROP_ALLOW_FRAME_DROP: {
+            propertyVal = videoParser_->allowFrameDrops();
+            return true;
+        }
+        case VideoReaderProps::PROP_UDP_SOURCE: {
+            propertyVal = videoParser_->udpSource();
+            return true;
+        }
         default:
             break;
         }
@@ -321,7 +329,7 @@ namespace
     }
 }
 
-Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const std::vector<int>& params, const bool rawMode, const int minNumDecodeSurfaces)
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const std::vector<int>& sourceParams, const VideoReaderInitParams params)
 {
     CV_Assert(!filename.empty());
 
@@ -330,22 +338,22 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
     try
     {
         // prefer ffmpeg to cuvidGetSourceVideoFormat() which doesn't always return the corrct raw pixel format
-        Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, params));
-        videoSource.reset(new RawVideoSourceWrapper(source, rawMode));
+        Ptr<RawVideoSource> source(new FFmpegVideoSource(filename, sourceParams));
+        videoSource.reset(new RawVideoSourceWrapper(source, params.rawMode));
     }
     catch (...)
     {
-        if (params.size()) throw;
+        if (sourceParams.size()) throw;
         videoSource.reset(new CuvidVideoSource(filename));
     }
 
-    return makePtr<VideoReaderImpl>(videoSource, minNumDecodeSurfaces);
+    return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource);
 }
 
-Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const bool rawMode, const int minNumDecodeSurfaces)
+Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
 {
-    Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, rawMode));
-    return makePtr<VideoReaderImpl>(videoSource, minNumDecodeSurfaces);
+    Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
+    return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces);
 }
 
 #endif // HAVE_NVCUVID