From ab988e54bcfb5ab49377d3ad9f9e47f9114fdddd Mon Sep 17 00:00:00 2001 From: Kevin McMurtrie Date: Sun, 28 Jan 2024 12:32:24 -0800 Subject: [PATCH 1/9] Fix VP8 low bitrate Tested with nearly motionless security camera footage and a television news recording of a press release. --- src/zimscraperlib/video/presets.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index da55da2d..9a22f03d 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -32,9 +32,8 @@ class VideoWebmLow(Config): """Low Quality webm video 480:h format with height adjusted to keep aspect ratio - 300k video bitrate - 48k audio bitrate - highly degraded quality (30, 42)""" + 200k target, 300k max video bitrate + 48k audio bitrate""" VERSION = 1 @@ -44,11 +43,10 @@ class VideoWebmLow(Config): options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset - "-b:v": "300k", # target video bitrate + "-b:v": "200k", # target video bitrate "-maxrate": "300k", # max video bitrate - "-minrate": "300k", # min video bitrate - "-qmin": "30", # min quantizer scale - "-qmax": "42", # max quantizer scale + "-bufsize": "512k", # target bitrate window + "-qmax": "30", # Max quantizer scale. Cap loss to reduce VP8 shimmer bug. "-vf": "scale='480:trunc(ow/a/2)*2'", # frame size "-codec:a": "libvorbis", # audio codec "-ar": "44100", # audio sampling rate From d143c987b4748b0f1ad71d49f495b176dd895a82 Mon Sep 17 00:00:00 2001 From: Kevin McMurtrie Date: Sun, 28 Jan 2024 21:54:58 -0800 Subject: [PATCH 2/9] Delete pointless identity test --- tests/video/test_video.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 31035273..0c0cb761 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -143,34 +143,7 @@ def test_preset_video_webm_low(): config = VideoWebmLow() assert config.VERSION == 1 args = config.to_ffmpeg_args() - assert len(args) == 24 - options_map = [ - ("codec:v", "libvpx"), - ("codec:a", "libvorbis"), - ("maxrate", "300k"), - ("minrate", "300k"), - ("b:v", "300k"), - ("ar", "44100"), - ("b:a", "48k"), - ("quality", "best"), - ("qmin", "30"), - ("qmax", "42"), - ("vf", "scale='480:trunc(ow/a/2)*2'"), - ] - for option, val in options_map: - idx = args.index(f"-{option}") - assert idx != -1 - assert args[idx + 1] == val - - # test updating values - config = VideoWebmLow(**{"-ar": "50000"}) - config["-bufsize"] = "900k" - args = config.to_ffmpeg_args() - idx = args.index("-ar") - assert idx != -1 and args[idx + 1] == "50000" - idx = args.index("-bufsize") - assert idx != -1 and args[idx + 1] == "900k" - + assert len(args) > 0 def test_preset_video_webm_high(): config = VideoWebmHigh() From 7880364f927edb442829e1bbd9bdeca0ac141f91 Mon Sep 17 00:00:00 2001 From: Kevin McMurtrie Date: Fri, 2 Feb 2024 20:24:01 -0800 Subject: [PATCH 3/9] Update presets.py Better tuning for very low motion and very high motion. --- src/zimscraperlib/video/presets.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index 9a22f03d..d001fa6f 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -32,7 +32,7 @@ class VideoWebmLow(Config): """Low Quality webm video 480:h format with height adjusted to keep aspect ratio - 200k target, 300k max video bitrate + 128k target video bitrate but stay within quality boundaries. 48k audio bitrate""" VERSION = 1 @@ -43,10 +43,9 @@ class VideoWebmLow(Config): options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset - "-b:v": "200k", # target video bitrate - "-maxrate": "300k", # max video bitrate - "-bufsize": "512k", # target bitrate window - "-qmax": "30", # Max quantizer scale. Cap loss to reduce VP8 shimmer bug. + "-b:v": "128k", # Adjust quantizer within min/max to target this bitrate + "-qmin": "18", # Reduce the bitrate on very still videos once the quality is good enough. + "-qmax": "40", # Increase the bitrate on very busy videos once the quality degrades too much. Also reduce key shimmer bug. "-vf": "scale='480:trunc(ow/a/2)*2'", # frame size "-codec:a": "libvorbis", # audio codec "-ar": "44100", # audio sampling rate From 10cc0e24a623c75437e953382804f899b051870e Mon Sep 17 00:00:00 2001 From: Kevin McMurtrie Date: Sat, 3 Feb 2024 15:37:17 -0800 Subject: [PATCH 4/9] Make style check happy? --- src/zimscraperlib/video/presets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index d001fa6f..c806cf4d 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -43,9 +43,9 @@ class VideoWebmLow(Config): options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = { "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset - "-b:v": "128k", # Adjust quantizer within min/max to target this bitrate - "-qmin": "18", # Reduce the bitrate on very still videos once the quality is good enough. - "-qmax": "40", # Increase the bitrate on very busy videos once the quality degrades too much. Also reduce key shimmer bug. + "-b:v": "128k", # Adjust quantizer within min/max to target this bitrate + "-qmin": "18", # Reduce the bitrate on very still videos + "-qmax": "40", # Increase the bitrate on very busy videos "-vf": "scale='480:trunc(ow/a/2)*2'", # frame size "-codec:a": "libvorbis", # audio codec "-ar": "44100", # audio sampling rate From a814079e730cb2830c126def204b74f9811be7a6 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 5 Feb 2024 09:12:36 +0100 Subject: [PATCH 5/9] Make style check happy --- src/zimscraperlib/video/presets.py | 4 ++-- tests/video/test_video.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index c806cf4d..89c2b3fb 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -44,8 +44,8 @@ class VideoWebmLow(Config): "-codec:v": "libvpx", # video codec "-quality": "best", # codec preset "-b:v": "128k", # Adjust quantizer within min/max to target this bitrate - "-qmin": "18", # Reduce the bitrate on very still videos - "-qmax": "40", # Increase the bitrate on very busy videos + "-qmin": "18", # Reduce the bitrate on very still videos + "-qmax": "40", # Increase the bitrate on very busy videos "-vf": "scale='480:trunc(ow/a/2)*2'", # frame size "-codec:a": "libvorbis", # audio codec "-ar": "44100", # audio sampling rate diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 0c0cb761..37788c01 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -145,6 +145,7 @@ def test_preset_video_webm_low(): args = config.to_ffmpeg_args() assert len(args) > 0 + def test_preset_video_webm_high(): config = VideoWebmHigh() assert config.VERSION == 1 From a3cb97168bc798ea0968c5475ebb3afe1f32d199 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 09:55:14 +0100 Subject: [PATCH 6/9] Add test details back --- tests/video/test_video.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 37788c01..a3296a9c 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -143,7 +143,31 @@ def test_preset_video_webm_low(): config = VideoWebmLow() assert config.VERSION == 1 args = config.to_ffmpeg_args() - assert len(args) > 0 + assert len(args) == 20 + options_map = [ + ("codec:v", "libvpx"), + ("codec:a", "libvorbis"), + ("b:v", "128k"), + ("ar", "44100"), + ("b:a", "48k"), + ("quality", "best"), + ("qmin", "18"), + ("qmax", "40"), + ("vf", "scale='480:trunc(ow/a/2)*2'"), + ] + for option, val in options_map: + idx = args.index(f"-{option}") + assert idx != -1 + assert args[idx + 1] == val + + # test updating values + config = VideoWebmLow(**{"-ar": "50000"}) + config["-bufsize"] = "900k" + args = config.to_ffmpeg_args() + idx = args.index("-ar") + assert idx != -1 and args[idx + 1] == "50000" + idx = args.index("-bufsize") + assert idx != -1 and args[idx + 1] == "900k" def test_preset_video_webm_high(): From fb15186d02323643ddfb81d17b47e9763fe8b4a0 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 09:57:23 +0100 Subject: [PATCH 7/9] Bump VideoWebmLow VERSION number --- CHANGELOG.md | 1 + src/zimscraperlib/video/presets.py | 2 +- tests/video/test_video.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13f3a4db..7bb2017f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 - Suuport for Python 3.12, drop Python 3.7 #118 - Replace "iso-369" iso639-lang by "iso639-lang" library +- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) ## [3.2.0] - 2023-12-16 diff --git a/src/zimscraperlib/video/presets.py b/src/zimscraperlib/video/presets.py index 89c2b3fb..37b7bbba 100644 --- a/src/zimscraperlib/video/presets.py +++ b/src/zimscraperlib/video/presets.py @@ -35,7 +35,7 @@ class VideoWebmLow(Config): 128k target video bitrate but stay within quality boundaries. 48k audio bitrate""" - VERSION = 1 + VERSION = 2 ext = "webm" mimetype = f"{preset_type}/webm" diff --git a/tests/video/test_video.py b/tests/video/test_video.py index a3296a9c..5b151f2e 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -141,7 +141,7 @@ def test_preset_has_mime_and_ext(): def test_preset_video_webm_low(): config = VideoWebmLow() - assert config.VERSION == 1 + assert config.VERSION == 2 args = config.to_ffmpeg_args() assert len(args) == 20 options_map = [ From 2fc4061ba5ff7f46dc5fd70f04bb720d23694930 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 14:33:14 +0100 Subject: [PATCH 8/9] Add a small utility to encode a video as scraper would do --- contrib/README.md | 3 +++ contrib/encode_video.py | 42 +++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 4 ++-- 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 contrib/README.md create mode 100644 contrib/encode_video.py diff --git a/contrib/README.md b/contrib/README.md new file mode 100644 index 00000000..6b30c80e --- /dev/null +++ b/contrib/README.md @@ -0,0 +1,3 @@ + +This folder contains some tooling around zimscraperlib: +- `encode_video.py`: a small utility to encode a video with an existing video preset, just like a scraper would do diff --git a/contrib/encode_video.py b/contrib/encode_video.py new file mode 100644 index 00000000..8b256e68 --- /dev/null +++ b/contrib/encode_video.py @@ -0,0 +1,42 @@ +import sys +from pathlib import Path +from typing import List + +from zimscraperlib import logger +from zimscraperlib.video import presets, reencode + + +def encode_video(src_path: Path, dst_path: Path, preset: str): + if not src_path.exists(): + raise ValueError(f"{src_path} does not exists") + try: + preset_cls = getattr(presets, preset) + except AttributeError: + logger.error(f"{preset} preset not found") + raise + logger.info(f"Encoding video {src_path} with {preset} version {preset_cls.VERSION}") + success, process = reencode( + src_path=src_path, + dst_path=dst_path, + ffmpeg_args=preset_cls().to_ffmpeg_args(), + with_process=True, + ) # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value) + if not success: + logger.error(f"conversion failed:\n{process.stdout}") + + +def run(args: List[str] = sys.argv): + if len(args) < 4: # noqa: PLR2004 + print(f"Usage: {args[0]} ") # noqa: T201 + print( # noqa: T201 + "\t\tpath to the video to encode." + "\t\tpath to the store the reencoded video." + "\t\tname of preset to use." + ) + return 1 + encode_video(Path(args[1]), Path(args[2]), args[3]) + return 0 + + +if __name__ == "__main__": + sys.exit(run()) diff --git a/pyproject.toml b/pyproject.toml index 1184b0b7..24d64bf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,7 +113,7 @@ target-version = ['py38'] [tool.ruff] target-version = "py38" line-length = 88 -src = ["src"] +src = ["src", "contrib"] [tool.ruff.lint] select = [ @@ -235,7 +235,7 @@ exclude_lines = [ ] [tool.pyright] -include = ["src", "tests", "tasks.py"] +include = ["contrib", "src", "tests", "tasks.py"] exclude = [".env/**", ".venv/**"] extraPaths = ["src"] pythonVersion = "3.8" From a7307d1e959c9af2119e622af87fdc765911a0b2 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 13 Feb 2024 14:37:01 +0100 Subject: [PATCH 9/9] Add control on ffmpeg CPU threads consumption, with a default of 1 --- CHANGELOG.md | 1 + src/zimscraperlib/video/encoding.py | 41 ++++++++++--- tests/video/test_encoding.py | 94 +++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 9 deletions(-) create mode 100644 tests/video/test_encoding.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bb2017f..d2fb1a8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Suuport for Python 3.12, drop Python 3.7 #118 - Replace "iso-369" iso639-lang by "iso639-lang" library - Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) +- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value) ## [3.2.0] - 2023-12-16 diff --git a/src/zimscraperlib/video/encoding.py b/src/zimscraperlib/video/encoding.py index 20509e7a..f66618af 100644 --- a/src/zimscraperlib/video/encoding.py +++ b/src/zimscraperlib/video/encoding.py @@ -6,11 +6,35 @@ import shutil import subprocess import tempfile +from typing import List, Optional from zimscraperlib import logger from zimscraperlib.logging import nicer_args_join +def _build_ffmpeg_args( + src_path: pathlib.Path, + tmp_path: pathlib.Path, + ffmpeg_args: List[str], + threads: Optional[int], +) -> List[str]: + if threads: + if "-threads" in ffmpeg_args: + raise AttributeError("Cannot set the number of threads, already set") + else: + ffmpeg_args += ["-threads", str(threads)] + args = [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + f"file:{src_path}", + *ffmpeg_args, + f"file:{tmp_path}", + ] + return args + + def reencode( src_path, dst_path, @@ -18,6 +42,7 @@ def reencode( delete_src=False, # noqa: FBT002 with_process=False, # noqa: FBT002 failsafe=True, # noqa: FBT002 + threads: Optional[int] = 1, ): """Runs ffmpeg with given ffmpeg_args @@ -25,6 +50,7 @@ def reencode( src_path - Path to source file dst_path - Path to destination file ffmpeg_args - A list of ffmpeg arguments + threads - Number of encoding threads used by ffmpeg delete_src - Delete source file after convertion with_process - Optionally return the output from ffmpeg (stderr and stdout) failsafe - Run in failsafe mode @@ -32,15 +58,12 @@ def reencode( with tempfile.TemporaryDirectory() as tmp_dir: tmp_path = pathlib.Path(tmp_dir).joinpath(f"video.tmp{dst_path.suffix}") - args = [ - "/usr/bin/env", - "ffmpeg", - "-y", - "-i", - f"file:{src_path}", - *ffmpeg_args, - f"file:{tmp_path}", - ] + args = _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + ) logger.debug( f"Encode {src_path} -> {dst_path} video format = {dst_path.suffix}" ) diff --git a/tests/video/test_encoding.py b/tests/video/test_encoding.py new file mode 100644 index 00000000..292f660a --- /dev/null +++ b/tests/video/test_encoding.py @@ -0,0 +1,94 @@ +import re +from pathlib import Path +from typing import List, Optional + +import pytest + +from zimscraperlib.video.encoding import _build_ffmpeg_args + + +@pytest.mark.parametrize( + "src_path,tmp_path,ffmpeg_args,threads,expected", + [ + ( + Path("path1/file1.mp4"), + Path("path1/fileout.mp4"), + [ + "-codec:v", + "libx265", + ], + None, + [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + "file:path1/file1.mp4", + "-codec:v", + "libx265", + "file:path1/fileout.mp4", + ], + ), + ( + Path("path2/file2.mp4"), + Path("path12/tmpfile.mp4"), + [ + "-b:v", + "300k", + ], + 1, + [ + "/usr/bin/env", + "ffmpeg", + "-y", + "-i", + "file:path2/file2.mp4", + "-b:v", + "300k", + "-threads", + "1", + "file:path12/tmpfile.mp4", + ], + ), + ( + Path("path2/file2.mp4"), + Path("path12/tmpfile.mp4"), + [ + "-b:v", + "300k", + "-threads", + "1", + ], + 1, + None, + ), + ], +) +def test_build_ffmpeg_args( + src_path: Path, + tmp_path: Path, + ffmpeg_args: List[str], + threads: Optional[int], + expected: Optional[List[str]], +): + if expected: + assert ( + _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + ) + == expected + ) + else: + with pytest.raises( + AttributeError, + match=re.escape("Cannot set the number of threads, already set"), + ): + _build_ffmpeg_args( + src_path=src_path, + tmp_path=tmp_path, + ffmpeg_args=ffmpeg_args, + threads=threads, + )