Skip to content
This repository has been archived by the owner on Jun 10, 2024. It is now read-only.

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

Closed
niujiabenbeng opened this issue Oct 24, 2023 · 2 comments

Comments

@niujiabenbeng
Copy link

I am using PyNvDecoder to decode video, and use makefromDevicePtrUint8() to wrap the decoded frames to torch.tensor. It works fine when running in a single process, but generates some strange frames when running in multiple processes. After some investigations, we found that sometimes makefromDevicePtrUint8 returns inaccurate data when running in multiple processes.
Specifically, to download a surface from gpu to cpu, makefromDevicePtrUint8().cpu().numpy() and PySurfaceDownloader generate different results, and the result from makefromDevicePtrUint8 is corrupted.

example.jpg

environment:

  • GPU: NVIDIA GeForce RTX 4090
  • system: Ubuntu 20.04.6 LTS
  • vpf commit: 82b51e7
  • pytorch version: 2.0.1+cu118

reproduce code:

#! /usr/bin/env python
# coding: utf-8

# yapf: disable

import os
import multiprocessing

import cv2
import torch
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc


def to_opencv_image(image, width, height):
    image = image.reshape((3, height, width))
    image = image.transpose((1, 2, 0))[:, :, ::-1]
    image = np.ascontiguousarray(image)
    return image


class NvColorConverter:
    "Color converter using PySurfaceConverter."

    def __init__(self, width, height, gpuid=0):
        self.context = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)
        self.to_yuv = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420, gpuid)
        self.to_rgb = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB, gpuid)
        self.to_planar = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR, gpuid)
        self.downloader = nvc.PySurfaceDownloader(width, height, nvc.PixelFormat.RGB_PLANAR, gpuid)

    def convert_color(self, surface):
        surface = self.to_yuv.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_rgb.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_planar.Execute(surface, self.context)
        if surface.Empty(): return None
        return surface

    def get_frame_from_torch(self, surface):
        surface_plane = surface.PlanePtr()
        surface_tensor = pnvc.makefromDevicePtrUint8(
            surface_plane.GpuMem(),
            surface_plane.Width(),
            surface_plane.Height(),
            surface_plane.Pitch(),
            surface_plane.ElemSize())
        return surface_tensor.cpu().numpy().flatten()

    def get_frame_from_downloader(self, surface):
        frame = np.ndarray(shape=(0,), dtype=np.uint8)
        assert self.downloader.DownloadSingleSurface(surface, frame)
        return frame


def decode_video(testid, path, gpuid=0):
    dec = nvc.PyNvDecoder(path, gpuid)
    cvt = NvColorConverter(dec.Width(), dec.Height())

    for i in range(dec.Numframes()):
        surface = dec.DecodeSingleSurface()
        if surface.Empty(): break
        surface = cvt.convert_color(surface)
        if surface is None: break
        # download same surface in two different ways
        frame1 = cvt.get_frame_from_torch(surface)
        frame2 = cvt.get_frame_from_downloader(surface)
        if np.sum(np.abs(frame1 - frame2)) == 0: continue
        # if two frames are not equal, write them to file
        frame1 = to_opencv_image(frame1, dec.Width(), dec.Height())
        frame2 = to_opencv_image(frame2, dec.Width(), dec.Height())
        image = np.concatenate((frame1, frame2), axis=0)
        path = f"images/{testid:02d}_{i:04d}.jpg"
        print("write image to: ", path)
        cv2.imwrite(path, image)

# replace this path
path = "samplevideo.mp4"
samples = list(enumerate([path] * 10))
os.makedirs("./images", exist_ok=True)

# if we use single process, everything works fine.
print("run in single process:")
with multiprocessing.Pool(processes=1) as pool:
    pool.starmap(decode_video, samples)

# if we use 4 processes, some error images are recorded.
print("run in multiple processes:")
with multiprocessing.Pool(processes=4) as pool:
    pool.starmap(decode_video, samples)

sample video:
samplevideo

@RomanArzumanyan
Copy link
Contributor

Hi @niujiabenbeng

It looks like a #506 duplicate, please check it out.
Please LMK if that doesn't help.

@niujiabenbeng
Copy link
Author

Hi @RomanArzumanyan
Thank you for so quick reply.
It works!!!

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants