Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add motion vector decoder. #291

Merged
merged 12 commits into from
Nov 30, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Support GYM99 data preparation ([#331](https://github.com/open-mmlab/mmaction2/pull/331))
- Add GradCAM utils for recognizer ([#324](https://github.com/open-mmlab/mmaction2/pull/324))
- Add print config script ([#345](https://github.com/open-mmlab/mmaction2/pull/345))
- Add online motion vector decoder ([#291](https://github.com/open-mmlab/mmaction2/pull/291))

**Improvements**
- Support PyTorch 1.7 in CI ([#312](https://github.com/open-mmlab/mmaction2/pull/312))
Expand Down
7 changes: 4 additions & 3 deletions mmaction/datasets/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@
GenerateLocalizationLabels, ImageDecode,
LoadAudioFeature, LoadHVULabel, LoadLocalizationFeature,
LoadProposals, OpenCVDecode, OpenCVInit, PyAVDecode,
PyAVInit, RawFrameDecode, SampleAVAFrames, SampleFrames,
SampleProposalFrames, UntrimmedSampleFrames)
PyAVDecodeMotionVector, PyAVInit, RawFrameDecode,
SampleAVAFrames, SampleFrames, SampleProposalFrames,
UntrimmedSampleFrames)

__all__ = [
'SampleFrames', 'PyAVDecode', 'DecordDecode', 'DenseSampleFrames',
Expand All @@ -31,5 +32,5 @@
'FormatAudioShape', 'LoadAudioFeature', 'AudioFeatureSelector',
'AudioDecodeInit', 'EntityBoxPad', 'EntityBoxFlip', 'EntityBoxCrop',
'EntityBoxRescale', 'EntityBoxClip', 'RandomScale', 'ImageDecode',
'BuildPseudoClip', 'RandomRescale'
'BuildPseudoClip', 'RandomRescale', 'PyAVDecodeMotionVector'
]
81 changes: 81 additions & 0 deletions mmaction/datasets/pipelines/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,87 @@ def __repr__(self):
return repr_str
SuX97 marked this conversation as resolved.
Show resolved Hide resolved


@PIPELINES.register_module()
class PyAVDecodeMotionVector(PyAVDecode):
"""Using pyav to decode the motion vectors from video.

Reference: https://github.com/PyAV-Org/PyAV/
blob/main/tests/test_decode.py

Required keys are "video_reader" and "frame_inds",
added or modified keys are "motion_vectors", "frame_inds".

Args:
multi_thread (bool): If set to True, it will apply multi
thread processing. Default: False.
"""

def _parse_vectors(self, mv, vectors, height, width):
SuX97 marked this conversation as resolved.
Show resolved Hide resolved
"""Parse the returned vectors."""
(w, h, src_x, src_y, dst_x,
dst_y) = (vectors['w'], vectors['h'], vectors['src_x'],
vectors['src_y'], vectors['dst_x'], vectors['dst_y'])
val_x = dst_x - src_x
SuX97 marked this conversation as resolved.
Show resolved Hide resolved
val_y = dst_y - src_y
start_x = -1 * w // 2 + dst_x
SuX97 marked this conversation as resolved.
Show resolved Hide resolved
start_y = -1 * h // 2 + dst_y
end_x = start_x + w
end_y = start_y + h
for sx, ex, sy, ey, vx, vy in zip(start_x, end_x, start_y, end_y,
val_x, val_y):
if (sx >= 0 and ex < width and sy >= 0 and ey < height):
mv[sy:ey, sx:ex] = (vx, vy)

return mv

def __call__(self, results):
"""Perform the PyAV motion vector decoding.

Args:
results (dict): The resulting dict to be modified and passed
to the next transform in pipeline.
"""
container = results['video_reader']
imgs = list()

if self.multi_thread:
container.streams.video[0].thread_type = 'AUTO'
if results['frame_inds'].ndim != 1:
results['frame_inds'] = np.squeeze(results['frame_inds'])
SuX97 marked this conversation as resolved.
Show resolved Hide resolved

# set max index to make early stop
max_idx = max(results['frame_inds'])
i = 0
stream = container.streams.video[0]
codec_context = stream.codec_context
codec_context.options = {'flags2': '+export_mvs'}
for packet in container.demux(stream):
for frame in packet.decode():
if i > max_idx + 1:
break
i += 1
height = frame.height
width = frame.width
mv = np.zeros((height, width, 2), dtype=np.int8)
vectors = frame.side_data.get('MOTION_VECTORS')
if frame.key_frame:
# Key frame don't have motion vectors
assert vectors is None
if vectors is not None and len(vectors) > 0:
mv = self._parse_vectors(mv, vectors.to_ndarray(), height,
width)
imgs.append(mv)

results['video_reader'] = None
del container

# the available frame in pyav may be less than its length,
# which may raise error
results['motion_vectors'] = np.array(
[imgs[i % len(imgs)] for i in results['frame_inds']])
return results


@PIPELINES.register_module()
class DecordInit:
"""Using decord to initialize the video_reader.
Expand Down
29 changes: 28 additions & 1 deletion tests/test_data/test_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
LoadAudioFeature, LoadHVULabel,
LoadLocalizationFeature,
LoadProposals, OpenCVDecode,
OpenCVInit, PyAVDecode, PyAVInit,
OpenCVInit, PyAVDecode,
PyAVDecodeMotionVector, PyAVInit,
RawFrameDecode, SampleAVAFrames,
SampleFrames, SampleProposalFrames,
UntrimmedSampleFrames)
Expand Down Expand Up @@ -1466,3 +1467,29 @@ def test_audio_feature_selector(self):
assert repr(audio_feature_selector) == (
f'{audio_feature_selector.__class__.__name__}('
f'fix_length={128})')

def test_pyav_decode_motion_vector(self):
pyav_init = PyAVInit()
pyav = PyAVDecodeMotionVector()

# test pyav with 2-dim input
results = {
'filename': self.video_path,
'frame_inds': np.arange(0, 32, 1)[:, np.newaxis]
}
results = pyav_init(results)
results = pyav(results)
SuX97 marked this conversation as resolved.
Show resolved Hide resolved
target_keys = ['motion_vectors']
assert self.check_keys_contain(results.keys(), target_keys)

# test pyav with 1 dim input
results = {
'filename': self.video_path,
'frame_inds': np.arange(0, 32, 1)
}
pyav_init = PyAVInit()
results = pyav_init(results)
pyav = PyAVDecodeMotionVector()
results = pyav(results)

assert self.check_keys_contain(results.keys(), target_keys)