Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix VP8 low bitrate #122

Merged
merged 9 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120
- Suuport for Python 3.12, drop Python 3.7 #118
- Replace "iso-369" iso639-lang by "iso639-lang" library
- Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2)
- When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value)

## [3.2.0] - 2023-12-16

Expand Down
3 changes: 3 additions & 0 deletions contrib/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

This folder contains some tooling around zimscraperlib:
- `encode_video.py`: a small utility to encode a video with an existing video preset, just like a scraper would do
42 changes: 42 additions & 0 deletions contrib/encode_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import sys
from pathlib import Path
from typing import List

from zimscraperlib import logger
from zimscraperlib.video import presets, reencode


def encode_video(src_path: Path, dst_path: Path, preset: str):
if not src_path.exists():
raise ValueError(f"{src_path} does not exists")
try:
preset_cls = getattr(presets, preset)
except AttributeError:
logger.error(f"{preset} preset not found")
raise
logger.info(f"Encoding video {src_path} with {preset} version {preset_cls.VERSION}")
success, process = reencode(
src_path=src_path,
dst_path=dst_path,
ffmpeg_args=preset_cls().to_ffmpeg_args(),
with_process=True,
) # pyright: ignore[reportGeneralTypeIssues] (returned type is variable, depending on `with_process` value)
if not success:
logger.error(f"conversion failed:\n{process.stdout}")


def run(args: List[str] = sys.argv):
if len(args) < 4: # noqa: PLR2004
print(f"Usage: {args[0]} <src_path> <dst_path> <preset>") # noqa: T201
print( # noqa: T201
"\t<src_path>\tpath to the video to encode."
"\t<dst_path>\tpath to the store the reencoded video."
"\t<preset>\tname of preset to use."
)
return 1
encode_video(Path(args[1]), Path(args[2]), args[3])
return 0


if __name__ == "__main__":
sys.exit(run())
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ target-version = ['py38']
[tool.ruff]
target-version = "py38"
line-length = 88
src = ["src"]
src = ["src", "contrib"]

[tool.ruff.lint]
select = [
Expand Down Expand Up @@ -235,7 +235,7 @@ exclude_lines = [
]

[tool.pyright]
include = ["src", "tests", "tasks.py"]
include = ["contrib", "src", "tests", "tasks.py"]
exclude = [".env/**", ".venv/**"]
extraPaths = ["src"]
pythonVersion = "3.8"
Expand Down
41 changes: 32 additions & 9 deletions src/zimscraperlib/video/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,64 @@
import shutil
import subprocess
import tempfile
from typing import List, Optional

from zimscraperlib import logger
from zimscraperlib.logging import nicer_args_join


def _build_ffmpeg_args(
src_path: pathlib.Path,
tmp_path: pathlib.Path,
ffmpeg_args: List[str],
threads: Optional[int],
) -> List[str]:
if threads:
if "-threads" in ffmpeg_args:
raise AttributeError("Cannot set the number of threads, already set")
else:
ffmpeg_args += ["-threads", str(threads)]
args = [
"/usr/bin/env",
"ffmpeg",
"-y",
"-i",
f"file:{src_path}",
*ffmpeg_args,
f"file:{tmp_path}",
]
return args


def reencode(
src_path,
dst_path,
ffmpeg_args,
delete_src=False, # noqa: FBT002
with_process=False, # noqa: FBT002
failsafe=True, # noqa: FBT002
threads: Optional[int] = 1,
):
"""Runs ffmpeg with given ffmpeg_args

Arguments -
src_path - Path to source file
dst_path - Path to destination file
ffmpeg_args - A list of ffmpeg arguments
threads - Number of encoding threads used by ffmpeg
delete_src - Delete source file after convertion
with_process - Optionally return the output from ffmpeg (stderr and stdout)
failsafe - Run in failsafe mode
"""

with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = pathlib.Path(tmp_dir).joinpath(f"video.tmp{dst_path.suffix}")
args = [
"/usr/bin/env",
"ffmpeg",
"-y",
"-i",
f"file:{src_path}",
*ffmpeg_args,
f"file:{tmp_path}",
]
args = _build_ffmpeg_args(
src_path=src_path,
tmp_path=tmp_path,
ffmpeg_args=ffmpeg_args,
threads=threads,
)
logger.debug(
f"Encode {src_path} -> {dst_path} video format = {dst_path.suffix}"
)
Expand Down
15 changes: 6 additions & 9 deletions src/zimscraperlib/video/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,20 @@ class VideoWebmLow(Config):
"""Low Quality webm video

480:h format with height adjusted to keep aspect ratio
300k video bitrate
48k audio bitrate
highly degraded quality (30, 42)"""
128k target video bitrate but stay within quality boundaries.
48k audio bitrate"""

VERSION = 1
VERSION = 2

ext = "webm"
mimetype = f"{preset_type}/webm"

options: ClassVar[Dict[str, Optional[Union[str, bool, int]]]] = {
"-codec:v": "libvpx", # video codec
"-quality": "best", # codec preset
"-b:v": "300k", # target video bitrate
"-maxrate": "300k", # max video bitrate
"-minrate": "300k", # min video bitrate
"-qmin": "30", # min quantizer scale
"-qmax": "42", # max quantizer scale
"-b:v": "128k", # Adjust quantizer within min/max to target this bitrate
"-qmin": "18", # Reduce the bitrate on very still videos
"-qmax": "40", # Increase the bitrate on very busy videos
"-vf": "scale='480:trunc(ow/a/2)*2'", # frame size
"-codec:a": "libvorbis", # audio codec
"-ar": "44100", # audio sampling rate
Expand Down
94 changes: 94 additions & 0 deletions tests/video/test_encoding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import re
from pathlib import Path
from typing import List, Optional

import pytest

from zimscraperlib.video.encoding import _build_ffmpeg_args


@pytest.mark.parametrize(
"src_path,tmp_path,ffmpeg_args,threads,expected",
[
(
Path("path1/file1.mp4"),
Path("path1/fileout.mp4"),
[
"-codec:v",
"libx265",
],
None,
[
"/usr/bin/env",
"ffmpeg",
"-y",
"-i",
"file:path1/file1.mp4",
"-codec:v",
"libx265",
"file:path1/fileout.mp4",
],
),
(
Path("path2/file2.mp4"),
Path("path12/tmpfile.mp4"),
[
"-b:v",
"300k",
],
1,
[
"/usr/bin/env",
"ffmpeg",
"-y",
"-i",
"file:path2/file2.mp4",
"-b:v",
"300k",
"-threads",
"1",
"file:path12/tmpfile.mp4",
],
),
(
Path("path2/file2.mp4"),
Path("path12/tmpfile.mp4"),
[
"-b:v",
"300k",
"-threads",
"1",
],
1,
None,
),
],
)
def test_build_ffmpeg_args(
src_path: Path,
tmp_path: Path,
ffmpeg_args: List[str],
threads: Optional[int],
expected: Optional[List[str]],
):
if expected:
assert (
_build_ffmpeg_args(
src_path=src_path,
tmp_path=tmp_path,
ffmpeg_args=ffmpeg_args,
threads=threads,
)
== expected
)
else:
with pytest.raises(
AttributeError,
match=re.escape("Cannot set the number of threads, already set"),
):
_build_ffmpeg_args(
src_path=src_path,
tmp_path=tmp_path,
ffmpeg_args=ffmpeg_args,
threads=threads,
)
12 changes: 5 additions & 7 deletions tests/video/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,20 +141,18 @@ def test_preset_has_mime_and_ext():

def test_preset_video_webm_low():
config = VideoWebmLow()
assert config.VERSION == 1
assert config.VERSION == 2
args = config.to_ffmpeg_args()
assert len(args) == 24
assert len(args) == 20
options_map = [
("codec:v", "libvpx"),
("codec:a", "libvorbis"),
("maxrate", "300k"),
("minrate", "300k"),
("b:v", "300k"),
("b:v", "128k"),
("ar", "44100"),
("b:a", "48k"),
("quality", "best"),
("qmin", "30"),
("qmax", "42"),
("qmin", "18"),
("qmax", "40"),
("vf", "scale='480:trunc(ow/a/2)*2'"),
]
for option, val in options_map:
Expand Down
Loading