From 161e8c56a48d886ee3717bf2f9f0a157dc22cadf Mon Sep 17 00:00:00 2001 From: Dan Niles <56271899+dan-niles@users.noreply.github.com> Date: Mon, 15 Jul 2024 16:36:37 +0530 Subject: [PATCH 1/3] Add e2e tests for zim metadata, channel.json, videos, and playlists --- scraper/tests-integration/integration.py | 68 ++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 5 deletions(-) diff --git a/scraper/tests-integration/integration.py b/scraper/tests-integration/integration.py index 85602f4..6a2c2ed 100644 --- a/scraper/tests-integration/integration.py +++ b/scraper/tests-integration/integration.py @@ -1,3 +1,4 @@ +import json import os from zimscraperlib.zim import Archive @@ -18,12 +19,69 @@ def test_zim_main_page(): assert main_entry.get_redirect_entry().path == "index.html" -def test_zim_scraper(): +def test_zim_metadata(): """Ensure scraper and zim title are present in metadata""" zim_fh = Archive(ZIM_FILE_PATH) - scraper = zim_fh.get_text_metadata("scraper") - zim_title = zim_fh.get_text_metadata("Title") - assert "youtube2zim " in scraper - assert "openZIM_testing" in zim_title + assert "youtube2zim " in zim_fh.get_text_metadata("scraper") + assert "openZIM_testing" in zim_fh.get_text_metadata("Title") + assert "-" in zim_fh.get_text_metadata("Description") + assert "en" in zim_fh.get_text_metadata("Language") + assert "openZIM" in zim_fh.get_text_metadata("Publisher") + assert "openZIM_testing" in zim_fh.get_text_metadata("Creator") + + assert zim_fh.get_item("profile.jpg").mimetype == "image/jpeg" + assert zim_fh.get_item("favicon.png").mimetype == "image/png" + + +def test_zim_channel_json(): + """Ensure channel.json exists and is valid""" + + zim_fh = Archive(ZIM_FILE_PATH) + assert zim_fh.get_item("channel.json").mimetype == "application/json" + channel_json = zim_fh.get_content("channel.json") + channel_json = json.loads(channel_json) + + assert channel_json["id"] == "UC8elThf5TGMpQfQc_VE917Q" + assert channel_json["channelName"] == "openZIM_testing" + assert channel_json["collectionType"] == "channel" + assert channel_json["mainPlaylist"] == "uploads_from_openzim_testing-917Q" + + +def test_zim_videos(): + """Ensure videos and video thumbnails are present in ZIM file""" + + zim_fh = Archive(ZIM_FILE_PATH) + videos_json_list = [ + "cloudy_sky_time_lapse_4k_free_footage_video_gopro_11-k02q.json", + "coffee_machine-DYvY.json", + "marvel_studios_avengers_endgame_official_trailer-TcMB.json", + "timelapse-9Tgo.json", + ] + + for video_json_file in videos_json_list: + json_path = "videos/" + video_json_file + assert zim_fh.get_item(json_path).mimetype == "application/json" + + video_json = zim_fh.get_content(json_path) + video_json = json.loads(video_json) + + assert zim_fh.get_item(video_json["videoPath"]).mimetype == "video/webm" + assert zim_fh.get_item(video_json["thumbnailPath"]).mimetype == "image/webp" + + +def test_zim_playlists(): + """Ensure playlists json files are present in ZIM file""" + + zim_fh = Archive(ZIM_FILE_PATH) + playlists_json_list = [ + "coffee-O2wS.json", + "timelapses-QgGI.json", + "trailers-5Gph.json", + "uploads_from_openzim_testing-917Q.json", + ] + + for playlist_json_file in playlists_json_list: + json_path = "playlists/" + playlist_json_file + assert zim_fh.get_item(json_path).mimetype == "application/json" From 2f296a414105fd7233cf27d95e8d7a8081ca41db Mon Sep 17 00:00:00 2001 From: Dan Niles <56271899+dan-niles@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:15:50 +0530 Subject: [PATCH 2/3] Add subtitle file validation in integration tests --- scraper/tests-integration/integration.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scraper/tests-integration/integration.py b/scraper/tests-integration/integration.py index 6a2c2ed..1a0d4ae 100644 --- a/scraper/tests-integration/integration.py +++ b/scraper/tests-integration/integration.py @@ -70,6 +70,14 @@ def test_zim_videos(): assert zim_fh.get_item(video_json["videoPath"]).mimetype == "video/webm" assert zim_fh.get_item(video_json["thumbnailPath"]).mimetype == "image/webp" + for subtitle in video_json["subtitleList"]: + assert ( + zim_fh.get_item( + video_json["subtitlePath"] + f"/video.{subtitle["code"]}.vtt" + ).mimetype + == "text/vtt" + ) + def test_zim_playlists(): """Ensure playlists json files are present in ZIM file""" From 0fcb9b3a66578d15327a1e512d59a9e60c8a291b Mon Sep 17 00:00:00 2001 From: Dan Niles <56271899+dan-niles@users.noreply.github.com> Date: Tue, 16 Jul 2024 08:14:11 +0530 Subject: [PATCH 3/3] Update CHANGELOG mentioning addition of integration tests --- CHANGELOG | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG b/CHANGELOG index e043b50..15e8135 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,6 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add videos, subtitles, thumbnails and channel branding to the ZIM "on the fly" (#209) - Remove `--no-zim`, `--keep` CLI arguments - Add support to index content from `zimui` JSON files in the ZIM using custom `IndexData` (#224) +- Add integration tests to check the content of the ZIM created by the scraper (#268) ## [2.3.0] - 2024-05-22