makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

niujiabenbeng · 2023-10-24T08:11:15Z

I am using PyNvDecoder to decode video, and use makefromDevicePtrUint8() to wrap the decoded frames to torch.tensor. It works fine when running in a single process, but generates some strange frames when running in multiple processes. After some investigations, we found that sometimes makefromDevicePtrUint8 returns inaccurate data when running in multiple processes.
Specifically, to download a surface from gpu to cpu, makefromDevicePtrUint8().cpu().numpy() and PySurfaceDownloader generate different results, and the result from makefromDevicePtrUint8 is corrupted.

environment:

GPU: NVIDIA GeForce RTX 4090
system: Ubuntu 20.04.6 LTS
vpf commit: 82b51e7
pytorch version: 2.0.1+cu118

reproduce code:

#! /usr/bin/env python
# coding: utf-8

# yapf: disable

import os
import multiprocessing

import cv2
import torch
import numpy as np
import PyNvCodec as nvc
import PytorchNvCodec as pnvc


def to_opencv_image(image, width, height):
    image = image.reshape((3, height, width))
    image = image.transpose((1, 2, 0))[:, :, ::-1]
    image = np.ascontiguousarray(image)
    return image


class NvColorConverter:
    "Color converter using PySurfaceConverter."

    def __init__(self, width, height, gpuid=0):
        self.context = nvc.ColorspaceConversionContext(nvc.ColorSpace.BT_601, nvc.ColorRange.MPEG)
        self.to_yuv = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.NV12, nvc.PixelFormat.YUV420, gpuid)
        self.to_rgb = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.YUV420, nvc.PixelFormat.RGB, gpuid)
        self.to_planar = nvc.PySurfaceConverter(width, height, nvc.PixelFormat.RGB, nvc.PixelFormat.RGB_PLANAR, gpuid)
        self.downloader = nvc.PySurfaceDownloader(width, height, nvc.PixelFormat.RGB_PLANAR, gpuid)

    def convert_color(self, surface):
        surface = self.to_yuv.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_rgb.Execute(surface, self.context)
        if surface.Empty(): return None
        surface = self.to_planar.Execute(surface, self.context)
        if surface.Empty(): return None
        return surface

    def get_frame_from_torch(self, surface):
        surface_plane = surface.PlanePtr()
        surface_tensor = pnvc.makefromDevicePtrUint8(
            surface_plane.GpuMem(),
            surface_plane.Width(),
            surface_plane.Height(),
            surface_plane.Pitch(),
            surface_plane.ElemSize())
        return surface_tensor.cpu().numpy().flatten()

    def get_frame_from_downloader(self, surface):
        frame = np.ndarray(shape=(0,), dtype=np.uint8)
        assert self.downloader.DownloadSingleSurface(surface, frame)
        return frame


def decode_video(testid, path, gpuid=0):
    dec = nvc.PyNvDecoder(path, gpuid)
    cvt = NvColorConverter(dec.Width(), dec.Height())

    for i in range(dec.Numframes()):
        surface = dec.DecodeSingleSurface()
        if surface.Empty(): break
        surface = cvt.convert_color(surface)
        if surface is None: break
        # download same surface in two different ways
        frame1 = cvt.get_frame_from_torch(surface)
        frame2 = cvt.get_frame_from_downloader(surface)
        if np.sum(np.abs(frame1 - frame2)) == 0: continue
        # if two frames are not equal, write them to file
        frame1 = to_opencv_image(frame1, dec.Width(), dec.Height())
        frame2 = to_opencv_image(frame2, dec.Width(), dec.Height())
        image = np.concatenate((frame1, frame2), axis=0)
        path = f"images/{testid:02d}_{i:04d}.jpg"
        print("write image to: ", path)
        cv2.imwrite(path, image)

# replace this path
path = "samplevideo.mp4"
samples = list(enumerate([path] * 10))
os.makedirs("./images", exist_ok=True)

# if we use single process, everything works fine.
print("run in single process:")
with multiprocessing.Pool(processes=1) as pool:
    pool.starmap(decode_video, samples)

# if we use 4 processes, some error images are recorded.
print("run in multiple processes:")
with multiprocessing.Pool(processes=4) as pool:
    pool.starmap(decode_video, samples)

sample video:
samplevideo

The text was updated successfully, but these errors were encountered:

RomanArzumanyan · 2023-10-24T08:14:22Z

Hi @niujiabenbeng

It looks like a #506 duplicate, please check it out.
Please LMK if that doesn't help.

niujiabenbeng · 2023-10-24T08:34:32Z

Hi @RomanArzumanyan
Thank you for so quick reply.
It works!!!

niujiabenbeng closed this as completed Oct 24, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

niujiabenbeng commented Oct 24, 2023

RomanArzumanyan commented Oct 24, 2023

niujiabenbeng commented Oct 24, 2023

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

makefromDevicePtrUint8 returns inaccurate data when running in multiple processes #546

Comments

niujiabenbeng commented Oct 24, 2023

RomanArzumanyan commented Oct 24, 2023

niujiabenbeng commented Oct 24, 2023