diff --git a/CHANGELOG.md b/CHANGELOG.md index d2fb1a8b..fa3bbf95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Using openZIM Python bootstrap conventions (including hatch-openzim plugin) #120 -- Suuport for Python 3.12, drop Python 3.7 #118 -- Replace "iso-369" iso639-lang by "iso639-lang" library +- Add support for Python 3.12, drop Python 3.7 support #118 +- Replace "iso-369" by "iso639-lang" library +- Replace "file-magic" by "python-magic" library for Alpine Linux support and better maintenance - Rework the VideoWebmLow preset for faster encoding and smaller file size (preset has been bumped to version 2) - When reencoding a video, ffmpeg now uses only 1 CPU thread by default (new arg to `reencode` allows to override this default value) diff --git a/README.md b/README.md index 1d8870e7..e909352f 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,13 @@ sudo apt install libmagic1 wget ffmpeg \ libharfbuzz-dev libfribidi-dev libxcb1-dev gifsicle ``` +## Alpine +``` +apk add ffmpeg gifsicle libmagic wget libjpeg +``` + +**Nota:** i18n features do not work on Alpine, see https://github.com/openzim/python-scraperlib/issues/134 ; there is one corresponding test which is failing. + # Contribution This project adheres to openZIM's [Contribution Guidelines](https://github.com/openzim/overview/wiki/Contributing) diff --git a/pyproject.toml b/pyproject.toml index 24d64bf0..53ad3b1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ "colorthief==0.2.1", "python-resize-image>=1.1.19,<1.2", "Babel>=2.9,<3.0", - "file-magic>=0.4.0,<0.5", + "python-magic>=0.4.3,<0.5", "libzim>=3.4.0,<4.0", "beautifulsoup4>=4.9.3,<4.10", # upgrade to 4.10 and later to be done "lxml>=4.6.3,<4.10", # upgrade to 4.10 and later to be done diff --git a/src/zimscraperlib/filesystem.py b/src/zimscraperlib/filesystem.py index 509675eb..7b22dfe3 100644 --- a/src/zimscraperlib/filesystem.py +++ b/src/zimscraperlib/filesystem.py @@ -31,7 +31,11 @@ def get_content_mimetype(content: bytes) -> str: """MIME Type of content retrieved from magic headers""" try: - detected_mime = magic.detect_from_content(content).mime_type + detected_mime = magic.from_buffer(content, mime=True) + if isinstance( + detected_mime, bytes + ): # pragma: no cover (old python-magic versions where returning bytes) + detected_mime = detected_mime.decode() except UnicodeDecodeError: return "application/octet-stream" return MIME_OVERRIDES.get(detected_mime, detected_mime) diff --git a/tests/filesystem/test_filesystem.py b/tests/filesystem/test_filesystem.py index 71daba63..08579c16 100644 --- a/tests/filesystem/test_filesystem.py +++ b/tests/filesystem/test_filesystem.py @@ -28,10 +28,10 @@ def test_content_mimetype_fallback(monkeypatch, undecodable_byte_stream): assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" # mock then so we keep coverage on systems where magic works - def raising_magic(*args): # noqa: ARG001 + def raising_magic(*args, **kwargs): # noqa: ARG001 raise UnicodeDecodeError("nocodec", b"", 0, 1, "noreason") - monkeypatch.setattr(magic, "detect_from_content", raising_magic) + monkeypatch.setattr(magic, "from_buffer", raising_magic) assert get_content_mimetype(undecodable_byte_stream) == "application/octet-stream" diff --git a/tests/video/test_video.py b/tests/video/test_video.py index 5b151f2e..ef541112 100644 --- a/tests/video/test_video.py +++ b/tests/video/test_video.py @@ -130,7 +130,17 @@ def test_get_media_info(media_format, media, expected, test_files): with tempfile.TemporaryDirectory() as t: src = pathlib.Path(t).joinpath(media) shutil.copy2(test_files[media_format], src) - assert get_media_info(src) == expected + result = get_media_info(src) + assert result.keys() == expected.keys() + assert result["codecs"] == expected["codecs"] + assert result["duration"] == expected["duration"] + # for bitrate, we need to allow some variability, not all ffmpeg version are + # reporting the same values (e.g. Alpine Linux is reporting 3837275 instead of + # 3818365 for video.mp4) ; we allow 1% variability with following assertion + assert ( + abs(100.0 * (result["bitrate"] - expected["bitrate"]) / expected["bitrate"]) + < 1 + ) def test_preset_has_mime_and_ext():