From b75743c3df1a8fdb49d550df24ea0c97d98b8377 Mon Sep 17 00:00:00 2001 From: RonTamG <33351836+RonTamG@users.noreply.github.com> Date: Sat, 19 Aug 2023 12:05:18 +0000 Subject: [PATCH] feat: added support for gzip compressed index files --- main.py | 25 +++++++++++++++---------- src/file_manager.py | 26 ++++++++++++++++++++------ src/sources_list.py | 2 +- src/update.py | 2 +- tests/test_sources_list.py | 18 +++++++++--------- 5 files changed, 46 insertions(+), 27 deletions(-) diff --git a/main.py b/main.py index d64ce34..3b3b24c 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import argparse +import gzip import lzma import os import re @@ -67,14 +68,16 @@ def set_permissions(tarinfo): tar.add(Path(path, "install.sh"), filter=set_permissions) -def read_lzma(path): +def read(path): """ - read a file from disk and decompress it using lzma + return the contents of a text file, supports xz, gz compressions """ - with open(path, "rb") as index_file: - data = index_file.read() - - return lzma.decompress(data).decode("utf-8") + if path.suffix == ".xz": + return lzma.decompress(path.read_bytes()).decode("utf-8") + elif path.suffix == ".gz": + return gzip.decompress(path.read_bytes()).decode("utf-8") + else: + return path.read_text() def apt_update(sources_list_path, temp_folder): @@ -97,12 +100,14 @@ def apt_update(sources_list_path, temp_folder): ] # create an index dictionary from the index files - index_files = (path for path in saved if path.endswith("Packages.xz")) - decompressed = (read_lzma(path) for path in index_files) - indexes = (generate_index_dictionary(data) for data in decompressed) + index_files = (path for path in saved if path.stem.endswith("Packages")) + decompressed = (read(path) for path in index_files) + indexes = ( + generate_index_dictionary(data) for data in decompressed if len(data) > 0 + ) # add an 'Apt-Source' key to all packages in the index, used later in order to download package # noqa: E501 - sources = (get_apt_sources(url) for url in urls if url.endswith("Packages.xz")) + sources = (get_apt_sources(url) for url in urls) indexes = ( add_apt_source_field(index, source) for index, source in zip(indexes, sources) ) diff --git a/src/file_manager.py b/src/file_manager.py index de4bf97..2b525db 100644 --- a/src/file_manager.py +++ b/src/file_manager.py @@ -1,4 +1,5 @@ import logging +import urllib import urllib.request from pathlib import Path @@ -17,6 +18,7 @@ def url_into_saved_file_name(url): class FileManager: def __init__(self, folder): self.folder = Path(folder) + self.supported_compressions = [".xz", ".gz", ""] def get(self, url, name, directory=""): """ @@ -24,21 +26,33 @@ def get(self, url, name, directory=""): """ saved_name = self.folder / directory / name if saved_name.exists(): - return str(saved_name) + return saved_name - with urllib.request.urlopen(url) as response: + request = urllib.request.Request( + url, + headers={"User-Agent": "Mozilla"}, + ) + + with urllib.request.urlopen(request) as response: if response.status == 200: self.save_file(name, response.read(), directory) - return str(saved_name) + return saved_name else: logging.warning( f"failed to download {url} with status {response.status}" ) - return "" + return Path("") def get_update_file(self, url): - name = url_into_saved_file_name(url) - return self.get(url, name, UPDATE_SUBDIRECTORY) + for compression in self.supported_compressions: + name = url_into_saved_file_name(url + compression) + + try: + return self.get(url + compression, name, UPDATE_SUBDIRECTORY) + except urllib.error.HTTPError: + continue + + return "" def get_package_file(self, url): name = Path(url).name diff --git a/src/sources_list.py b/src/sources_list.py index 48df481..cdd2660 100644 --- a/src/sources_list.py +++ b/src/sources_list.py @@ -41,7 +41,7 @@ def index_urls(self, architecture): dist, component, "binary-" + architecture, - "Packages.xz", + "Packages", ) for component in components ] diff --git a/src/update.py b/src/update.py index 5bea711..10c5ddf 100644 --- a/src/update.py +++ b/src/update.py @@ -46,7 +46,7 @@ def get_apt_sources(url): return the apt sources of a given index package url """ pattern = re.compile( - r"(?P\w+://.+?/.+)/dists/(?P.+?)/(?P.+?)/binary-(?P.+?)/Packages.xz" + r"(?P\w+://.+?/.+)/dists/(?P.+?)/(?P.+?)/binary-(?P.+?)/Packages" ) result = re.match(pattern, url) if result is None: diff --git a/tests/test_sources_list.py b/tests/test_sources_list.py index 1454358..1e6e905 100644 --- a/tests/test_sources_list.py +++ b/tests/test_sources_list.py @@ -15,15 +15,15 @@ def sources_list(): def test_sources_list_returns_index_urls(sources_list): expected = [ - "http://deb.debian.org/debian/dists/bullseye/main/binary-amd64/Packages.xz", - "http://deb.debian.org/debian/dists/bullseye/contrib/binary-amd64/Packages.xz", - "http://deb.debian.org/debian/dists/bullseye/non-free/binary-amd64/Packages.xz", - "http://deb.debian.org/debian/dists/bullseye-updates/main/binary-amd64/Packages.xz", - "http://deb.debian.org/debian/dists/bullseye-updates/contrib/binary-amd64/Packages.xz", - "http://deb.debian.org/debian/dists/bullseye-updates/non-free/binary-amd64/Packages.xz", - "http://security.debian.org/debian-security/dists/bullseye-security/main/binary-amd64/Packages.xz", - "http://security.debian.org/debian-security/dists/bullseye-security/contrib/binary-amd64/Packages.xz", - "http://security.debian.org/debian-security/dists/bullseye-security/non-free/binary-amd64/Packages.xz", + "http://deb.debian.org/debian/dists/bullseye/main/binary-amd64/Packages", + "http://deb.debian.org/debian/dists/bullseye/contrib/binary-amd64/Packages", + "http://deb.debian.org/debian/dists/bullseye/non-free/binary-amd64/Packages", + "http://deb.debian.org/debian/dists/bullseye-updates/main/binary-amd64/Packages", + "http://deb.debian.org/debian/dists/bullseye-updates/contrib/binary-amd64/Packages", + "http://deb.debian.org/debian/dists/bullseye-updates/non-free/binary-amd64/Packages", + "http://security.debian.org/debian-security/dists/bullseye-security/main/binary-amd64/Packages", + "http://security.debian.org/debian-security/dists/bullseye-security/contrib/binary-amd64/Packages", + "http://security.debian.org/debian-security/dists/bullseye-security/non-free/binary-amd64/Packages", ] result = SourcesList(sources_list).index_urls(architecture="amd64")