Skip to content

Commit

Permalink
Improve download of ZIP file archive
Browse files Browse the repository at this point in the history
- Use the environment variable `TLDR_MAN_ARCHIVE_URL` if set.
- Display download progress.
- Improved error messages.
  • Loading branch information
superatomic committed Nov 22, 2023
1 parent a5f2b9a commit cfdfd34
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 27 deletions.
1 change: 1 addition & 0 deletions src/tldr_man/color.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
style_error = _style(fg='red', bold=True)
style_input = _style(fg='yellow')
style_path = _style(fg='blue')
style_task = _style(fg='cyan')
style_url = _style(underline=True)

style_create = _style(fg='green', bold=True)
Expand Down
65 changes: 38 additions & 27 deletions src/tldr_man/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import shlex
import zipfile
from concurrent.futures import ThreadPoolExecutor
from contextlib import suppress
from contextlib import suppress, contextmanager
from pathlib import Path
from os import makedirs, getenv
from shutil import rmtree, move, which
Expand All @@ -29,14 +29,15 @@
import requests
from click import echo, progressbar, format_filename
from filelock import FileLock
from requests.exceptions import ConnectionError, HTTPError, InvalidSchema, InvalidURL, MissingSchema, Timeout

from tldr_man.color import style_command, style_path, style_url, style_create, style_update, style_no_change
from tldr_man.errors import Fail, NoPageCache, ExternalCommandNotFound, PageNotFound, eprint
from tldr_man.color import style_command, style_path, style_task, style_url, style_create, style_update, style_no_change
from tldr_man.errors import Fail, NoPageCache, ExternalCommandNotFound, PageNotFound
from tldr_man.temp_path import temp_file, temp_dir

CACHE_DIR_NAME = 'tldr-man'

ZIP_ARCHIVE_URL = "https://tldr.sh/assets/tldr.zip"
ZIP_ARCHIVE_URL = getenv('TLDR_MAN_ARCHIVE_URL', "https://tldr.sh/assets/tldr.zip")

MANPAGE_SECTION = '1'

Expand Down Expand Up @@ -89,20 +90,38 @@ def get_cache_dir() -> Path:
cache_dir_lock = FileLock(CACHE_DIR.parent / f'.{CACHE_DIR.name}.lock', timeout=2)


def download_archive(location: Path, url: str = ZIP_ARCHIVE_URL) -> None:
"""Downloads the current tldr-pages zip archive into a specific location."""

try:
r = requests.get(url, timeout=10)
except requests.ConnectionError:
raise Fail(f"Could not make connection to {style_url(url)}")
except requests.Timeout:
raise Fail(f"Request to {style_url(url)} timed out")
except requests.RequestException:
eprint(f"The following error occurred when trying to access {style_url(url)}:")
raise
else:
location.write_bytes(r.content)
@contextmanager
def pages_archive(url: str = ZIP_ARCHIVE_URL) -> Iterator[zipfile.Path]:
"""Downloads the current tldr-pages zip archive and yields it."""
with temp_file('tldr.zip') as zip_file:
try:
with requests.get(url, stream=True, timeout=10) as r:
r.raise_for_status()
try:
length = int(r.headers['Content-Length'])
except (KeyError, ValueError): # KeyError if lookup failed and ValueError if `int()` failed.
length = None
with (open(zip_file, 'wb') as file,
progressbar(
r.iter_content(chunk_size=8192),
label=style_task("Downloading ZIP"),
length=length,
) as chunks):
for chunk in chunks:
file.write(chunk)
yield zipfile.Path(zip_file)
except (InvalidURL, InvalidSchema):
raise Fail(f"Invalid URL '{style_url(url)}'")
except MissingSchema:
raise Fail(f"Invalid URL '{style_url(url)}'. Perhaps you meant {style_url('https://' + url)}?")
except ConnectionError:
raise Fail(f"Could not connect to {style_url(url)}")
except Timeout:
raise Fail(f"Request to {style_url(url)} timed out")
except HTTPError:
raise Fail(f"Request to {style_url(url)}: {r.status_code} {r.reason}")
except zipfile.BadZipFile:
raise Fail(f"Got a bad ZIP file from {style_url(ZIP_ARCHIVE_URL)}")


AnyPath = TypeVar('AnyPath', Path, zipfile.Path)
Expand All @@ -124,15 +143,7 @@ def update_cache() -> None:

created, updated, unchanged = 0, 0, 0

with temp_file('tldr.zip') as zip_archive_location, temp_dir('tldr-man') as temp_cache_dir:

# Get the zip file
download_archive(zip_archive_location)
try:
zip_path = zipfile.Path(zip_archive_location)
except zipfile.BadZipFile:
raise Fail(f"Got a bad zipfile from {style_url(ZIP_ARCHIVE_URL)}")

with pages_archive() as zip_path, temp_dir('tldr-man') as temp_cache_dir:
# Iterate through each language and section in the zip file.
for language_dir in iter_dirs(zip_path):
for sections_dir in iter_dirs(language_dir):
Expand Down
10 changes: 10 additions & 0 deletions tldr-man.1
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,16 @@ If none of these environment variables are set or if no valid pages exist for an
this defaults back to
.BR en .
.TP
.BR TLDR_MAN_ARCHIVE_URL
The URL to download pages from when running the
.B tldr \-\-update
command. If this variable is not set, the default
.UR https://tldr.sh/assets/tldr.zip
official archive URL
.UE
is used. The file located at the provided URL must be a well\-formed ZIP file with contents which match the directory
structure specified in the TLDR\-Pages Client Specification.
.TP
.BR TLDR_MAN_CACHE_DIR
The location of the page cache directory. This directory is created by the
.B tldr \-\-update
Expand Down

0 comments on commit cfdfd34

Please sign in to comment.