From 4ce120db56436661ec66bbbc04a4ed013607135c Mon Sep 17 00:00:00 2001 From: Jan Willhaus Date: Sat, 7 Dec 2024 20:52:30 +0100 Subject: [PATCH] feat: Improve progress display and episodes report (#213) [no-bump] --- podcast_archiver/console.py | 2 +- podcast_archiver/constants.py | 3 +- podcast_archiver/download.py | 19 ++--- podcast_archiver/enums.py | 8 +- podcast_archiver/logging.py | 14 ++-- podcast_archiver/models/episode.py | 21 +++++- podcast_archiver/processor.py | 32 ++++++-- podcast_archiver/types.py | 1 + podcast_archiver/utils/pretty_printing.py | 90 +++++++++++++---------- podcast_archiver/utils/progress.py | 61 ++++++++++++--- 10 files changed, 169 insertions(+), 82 deletions(-) diff --git a/podcast_archiver/console.py b/podcast_archiver/console.py index 157000e..e9f8a7c 100644 --- a/podcast_archiver/console.py +++ b/podcast_archiver/console.py @@ -7,7 +7,7 @@ { "error": "bold dark_red", "warning": "magenta", - "missing": "orange1", + "missing": "orange1 bold", "completed": "bold dark_cyan", "success": "dark_cyan", } diff --git a/podcast_archiver/constants.py b/podcast_archiver/constants.py index 7a8f087..eff3904 100644 --- a/podcast_archiver/constants.py +++ b/podcast_archiver/constants.py @@ -13,8 +13,7 @@ DOWNLOAD_CHUNK_SIZE = 256 * 1024 DEBUG_PARTIAL_SIZE = DOWNLOAD_CHUNK_SIZE * 4 -MAX_TITLE_LENGTH = 84 - +MAX_TITLE_LENGTH = 120 DEFAULT_DATETIME_FORMAT = "%Y-%m-%d" DEFAULT_ARCHIVE_DIRECTORY = pathlib.Path(".") diff --git a/podcast_archiver/download.py b/podcast_archiver/download.py index a46bb24..d30d372 100644 --- a/podcast_archiver/download.py +++ b/podcast_archiver/download.py @@ -8,7 +8,7 @@ from podcast_archiver import constants from podcast_archiver.enums import DownloadResult from podcast_archiver.exceptions import NotCompleted -from podcast_archiver.logging import logger, rprint +from podcast_archiver.logging import logger from podcast_archiver.session import session from podcast_archiver.types import EpisodeResult from podcast_archiver.utils import atomic_write @@ -32,27 +32,24 @@ class DownloadJob: def __call__(self) -> EpisodeResult: try: - return self.run() + self.run() + result = DownloadResult.COMPLETED_SUCCESSFULLY except NotCompleted: - res = EpisodeResult(self.episode, DownloadResult.ABORTED) + result = DownloadResult.ABORTED except Exception as exc: logger.error("Download failed: %s; %s", self.episode, exc) logger.debug("Exception while downloading", exc_info=exc) - res = EpisodeResult(self.episode, DownloadResult.FAILED) + result = DownloadResult.FAILED - rprint(f"[error]✘ {res.result}:[/] {res.episode}") - return res + return EpisodeResult(self.episode, result) - def run(self) -> EpisodeResult: + def run(self) -> None: self.target.parent.mkdir(parents=True, exist_ok=True) logger.info("Downloading: %s", self.episode) response = session.get_and_raise(self.episode.enclosure.href, stream=True) with self.write_info_json(), atomic_write(self.target, mode="wb") as fp: self.receive_data(fp, response) - logger.info("Completed: %s", self.episode) - rprint(f"[dark_cyan]✔ {DownloadResult.COMPLETED_SUCCESSFULLY}:[/] {self.episode}") - return EpisodeResult(self.episode, DownloadResult.COMPLETED_SUCCESSFULLY) @property def infojsonfile(self) -> Path: @@ -64,7 +61,7 @@ def receive_data(self, fp: IO[bytes], response: Response) -> None: max_bytes = self.max_download_bytes for chunk in progress_manager.track( response.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE), - description=str(self.episode), + episode=self.episode, total=total_size, ): total_written += fp.write(chunk) diff --git a/podcast_archiver/enums.py b/podcast_archiver/enums.py index 3d8d6da..87e2024 100644 --- a/podcast_archiver/enums.py +++ b/podcast_archiver/enums.py @@ -9,10 +9,10 @@ def __str__(self) -> str: class QueueCompletionType(StrEnum): - COMPLETED = "Archived all episodes" - FOUND_EXISTING = "Archive is up to date" - MAX_EPISODES = "Maximum episode count reached" - FAILED = "Failed" + COMPLETED = "✔ Archived all episodes" + FOUND_EXISTING = "✔ Archive is up to date" + MAX_EPISODES = "✔ Maximum episode count reached" + FAILED = "✘ Failed" class DownloadResult(StrEnum): diff --git a/podcast_archiver/logging.py b/podcast_archiver/logging.py index 3673091..7bfd644 100644 --- a/podcast_archiver/logging.py +++ b/podcast_archiver/logging.py @@ -4,26 +4,30 @@ import logging.config import sys from os import environ -from typing import Any +from typing import TYPE_CHECKING, Any from rich.logging import RichHandler from rich.text import Text from podcast_archiver.console import console +if TYPE_CHECKING: + from rich.console import RenderableType + logger = logging.getLogger("podcast_archiver") REDIRECT_VIA_LOGGING: bool = False -def rprint(msg: str, **kwargs: Any) -> None: +def rprint(*msg: RenderableType, **kwargs: Any) -> None: if not REDIRECT_VIA_LOGGING: - console.print(msg, **kwargs) + console.print(*msg, **kwargs) return - text = Text.from_markup(msg.strip()).plain.strip() - logger.info(text) + for m in msg: + if isinstance(m, Text): + logger.info(m.plain.strip()) def is_interactive() -> bool: diff --git a/podcast_archiver/models/episode.py b/podcast_archiver/models/episode.py index e4d968a..820e8d0 100644 --- a/podcast_archiver/models/episode.py +++ b/podcast_archiver/models/episode.py @@ -2,7 +2,7 @@ from functools import cached_property from pathlib import Path -from typing import Annotated +from typing import TYPE_CHECKING, Annotated from urllib.parse import urlparse from pydantic import ( @@ -11,6 +11,8 @@ field_validator, model_validator, ) +from rich.table import Table +from rich.text import Text from podcast_archiver.constants import DEFAULT_DATETIME_FORMAT, MAX_TITLE_LENGTH from podcast_archiver.exceptions import MissingDownloadUrl @@ -18,6 +20,9 @@ from podcast_archiver.models.misc import Link from podcast_archiver.utils import get_generic_extension, truncate +if TYPE_CHECKING: + from rich.console import RenderableType + class Chapter(BaseModel): start: str @@ -36,11 +41,23 @@ class BaseEpisode(BaseModel): published_time: LenientDatetime = Field(alias="published_parsed", title="episode.published_time") original_filename: str = Field(default="", repr=False, title="episode.original_filename") + original_title: str = Field(default="Untitled Episode", repr=False, validation_alias="title") guid: str = Field(default=None, alias="id") # type: ignore[assignment] def __str__(self) -> str: - return f"{self.title} ({self.published_time.strftime(DEFAULT_DATETIME_FORMAT)})" + return f"{self.published_time.strftime(DEFAULT_DATETIME_FORMAT)} {self.title}" + + def __rich__(self) -> RenderableType: + """Makes the Progress class itself renderable.""" + grid = Table.grid() + grid.add_column(style="dim") + grid.add_column() + grid.add_row( + Text(f"{self.published_time:%Y-%m-%d} "), + Text(self.title, overflow="ellipsis", no_wrap=True), + ) + return grid @field_validator("title", mode="after") @classmethod diff --git a/podcast_archiver/processor.py b/podcast_archiver/processor.py index b294232..71938ff 100644 --- a/podcast_archiver/processor.py +++ b/podcast_archiver/processor.py @@ -4,6 +4,9 @@ from threading import Event from typing import TYPE_CHECKING +from rich.console import Group +from rich.text import Text + from podcast_archiver import constants from podcast_archiver.config import Settings from podcast_archiver.database import get_database @@ -11,7 +14,12 @@ from podcast_archiver.enums import DownloadResult, QueueCompletionType from podcast_archiver.logging import logger, rprint from podcast_archiver.models.feed import Feed, FeedInfo -from podcast_archiver.types import EpisodeResult, EpisodeResultsList, FutureEpisodeResult, ProcessingResult +from podcast_archiver.types import ( + EpisodeResult, + EpisodeResultsList, + FutureEpisodeResult, + ProcessingResult, +) from podcast_archiver.utils import FilenameFormatter, handle_feed_request from podcast_archiver.utils.pretty_printing import PrettyPrintEpisodeRange @@ -48,7 +56,7 @@ def process(self, url: str) -> ProcessingResult: return ProcessingResult(feed=None, tombstone=QueueCompletionType.FAILED) result = self.process_feed(feed=feed) - rprint(f"[completed]{result.tombstone}[/]") + rprint(result.tombstone, style="completed") return result def load_feed(self, url: str, known_feeds: dict[str, FeedInfo]) -> Feed | None: @@ -116,11 +124,11 @@ def process_feed(self, feed: Feed) -> ProcessingResult: success, failures = self._handle_results(results) return ProcessingResult(feed=feed, success=success, failures=failures, tombstone=tombstone) - def _enqueue_episode(self, episode: BaseEpisode, feed_info: FeedInfo) -> FutureEpisodeResult | EpisodeResult: + def _enqueue_episode(self, episode: BaseEpisode, feed_info: FeedInfo) -> FutureEpisodeResult: target = self.filename_formatter.format(episode=episode, feed_info=feed_info) if self._does_already_exist(episode, target=target): result = DownloadResult.ALREADY_EXISTS - return EpisodeResult(episode, result) + return EpisodeResult(episode, result, is_eager=True) logger.debug("Queueing download for %r", episode) return self.pool_executor.submit( @@ -139,12 +147,20 @@ def _handle_results(self, episode_results: EpisodeResultsList) -> tuple[int, int if isinstance(episode_result, Future): episode_result = episode_result.result() - if episode_result.result not in DownloadResult.successful(): - failures += 1 + if episode_result.is_eager: + success += 1 + self.database.add(episode_result.episode) continue - self.database.add(episode_result.episode) - success += 1 + if episode_result.result in DownloadResult.successful(): + prefix = Text(f"✔ {episode_result.result} ", style="success", end=" ") + success += 1 + self.database.add(episode_result.episode) + else: + prefix = Text(f"✖ {episode_result.result} ", style="error", end=" ") + failures += 1 + + rprint(Group(prefix, episode_result.episode)) return success, failures def shutdown(self) -> None: diff --git a/podcast_archiver/types.py b/podcast_archiver/types.py index 9136ce4..dd07abe 100644 --- a/podcast_archiver/types.py +++ b/podcast_archiver/types.py @@ -14,6 +14,7 @@ class EpisodeResult: episode: BaseEpisode result: DownloadResult + is_eager: bool = False @dataclass(slots=True, frozen=True) diff --git a/podcast_archiver/utils/pretty_printing.py b/podcast_archiver/utils/pretty_printing.py index 9551116..62adbd1 100644 --- a/podcast_archiver/utils/pretty_printing.py +++ b/podcast_archiver/utils/pretty_printing.py @@ -3,29 +3,21 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any -from podcast_archiver.enums import DownloadResult +from rich.table import Table +from rich.text import Text + from podcast_archiver.logging import rprint if TYPE_CHECKING: - from podcast_archiver.models.episode import BaseEpisode - - -MSG_1 = """\ -{prefix} {first}""" - -MSG_2 = """\ -{prefix} {first} - {last}""" + from rich.console import ConsoleRenderable -MSG_MORE = """\ -{prefix} {first} - [dim]...[/] - {last}""" + from podcast_archiver.models.episode import BaseEpisode @dataclass(slots=True) class _ValPair: prefix: str + style: str first: BaseEpisode | None = None last: BaseEpisode | None = None length: int = 0 @@ -33,51 +25,71 @@ class _ValPair: def populate(self, obj: BaseEpisode) -> None: if not self.first: self.first = obj - else: - self.last = obj + self.last = obj self.length += 1 - def emit(self) -> str | None: - msg = None - if self.length == 1: - msg = MSG_1.format(prefix=self.prefix, first=self.first, last=self.last) - if self.length == 2: - msg = MSG_2.format(prefix=self.prefix, first=self.first, last=self.last) - elif self.length > 2: - msg = MSG_MORE.format(prefix=self.prefix, first=self.first, last=self.last) - - self.first = None - self.last = None - self.length = 0 - return msg + def emit(self) -> _ValPair | None: + if not self.first: + return None + return self class PrettyPrintEpisodeRange: _existing: _ValPair _missing: _ValPair _last_populated: _ValPair + pairs: list[_ValPair] - __slots__ = ("_existing", "_missing", "_last_populated") + __slots__ = ("_existing", "_missing", "_last_populated", "pairs") def __init__(self) -> None: - self._existing = _ValPair(prefix=f"[success]✔ {DownloadResult.ALREADY_EXISTS}:[/]") - self._missing = self._last_populated = _ValPair(prefix="[missing]✘ Missing:[/]") + self._existing = _ValPair(prefix="✔ Present", style="success") + self._missing = self._last_populated = _ValPair("✘ Missing", style="missing") + self.pairs = [] def __enter__(self) -> PrettyPrintEpisodeRange: return self def __exit__(self, *args: Any) -> None: - if msg := self._last_populated.emit(): - rprint(msg) + if emitted := self._last_populated.emit(): + self.pairs.append(emitted) + rprint(self) - def _update_state(self, obj: BaseEpisode, to_populate: _ValPair, to_emit: _ValPair) -> None: + def _update_state(self, obj: BaseEpisode, to_populate: _ValPair, to_emit: _ValPair) -> _ValPair: self._last_populated = to_populate to_populate.populate(obj) - if msg := to_emit.emit(): - rprint(msg) + if emitted := to_emit.emit(): + self.pairs.append(emitted) + return _ValPair(prefix=to_emit.prefix, style=to_emit.style) + return to_emit def update(self, exists: bool, obj: BaseEpisode) -> None: if exists: - self._update_state(obj, to_populate=self._existing, to_emit=self._missing) + self._missing = self._update_state(obj, to_populate=self._existing, to_emit=self._missing) else: - self._update_state(obj, to_populate=self._missing, to_emit=self._existing) + self._existing = self._update_state(obj, to_populate=self._missing, to_emit=self._existing) + + def __rich__(self) -> ConsoleRenderable | str: + if not self.pairs: + return "" + grid = Table.grid() + grid.add_column() + grid.add_column() + grid.add_column() + for pair in self.pairs: + grid.add_row( + Text(pair.prefix, style=pair.style), + Text("╶┬╴" if pair.length > 1 else " ", style=pair.style), + pair.first, + ) + if pair.length == 1: + continue + if pair.length > 1: + grid.add_row("", " │ ", "", style=pair.style) + grid.add_row( + "", + Text(" ╰╴" if pair.last else " ", style=pair.style), + pair.last, + ) + grid.add_row("", "", "") + return grid diff --git a/podcast_archiver/utils/progress.py b/podcast_archiver/utils/progress.py index 8ede210..0bdc76f 100644 --- a/podcast_archiver/utils/progress.py +++ b/podcast_archiver/utils/progress.py @@ -1,24 +1,65 @@ from __future__ import annotations +from functools import partial from threading import Event, Lock, Thread -from typing import Iterable +from typing import TYPE_CHECKING, Iterable from rich import progress as rp +from rich.table import Column from podcast_archiver.console import console from podcast_archiver.logging import REDIRECT_VIA_LOGGING +if TYPE_CHECKING: + from rich.console import RenderableType + + from podcast_archiver.models.episode import BaseEpisode + + +class EpisodeColumn(rp.RenderableColumn): + def render(self, task: rp.Task) -> RenderableType: + return task.fields["episode"] + + +_Column = partial( + Column, + no_wrap=True, + overflow="ignore", + highlight=False, +) + PROGRESS_COLUMNS: tuple[rp.ProgressColumn, ...] = ( - rp.SpinnerColumn(finished_text="[success]✔[/]"), - rp.TextColumn("{task.description}"), - rp.BarColumn(bar_width=25), - rp.TaskProgressColumn(), - rp.TimeRemainingColumn(), - rp.DownloadColumn(), - rp.TransferSpeedColumn(), + rp.SpinnerColumn( + table_column=_Column(width=4), + ), + rp.TimeRemainingColumn( + compact=True, + table_column=_Column(width=11, justify="center"), + ), + EpisodeColumn( + table_column=_Column( + overflow="ellipsis", + min_width=40, + ), + ), + rp.BarColumn( + bar_width=20, + table_column=_Column(width=20), + ), + rp.TaskProgressColumn( + table_column=_Column(width=5), + ), + rp.TransferSpeedColumn( + table_column=_Column(width=10), + ), ) +_widths = sum(col.get_table_column().width or 0 for col in [*PROGRESS_COLUMNS[:1], *PROGRESS_COLUMNS[3:]]) +description_col = PROGRESS_COLUMNS[2].get_table_column() +description_col.width = max(console.width - _widths, description_col.min_width or 0) + + class _ProgressRefreshThread(Thread): progress: rp.Progress stop_event: Event @@ -61,13 +102,13 @@ def __init__(self) -> None: refresh_per_second=8, ) - def track(self, iterable: Iterable[bytes], description: str, total: int) -> Iterable[bytes]: + def track(self, iterable: Iterable[bytes], total: int, episode: BaseEpisode) -> Iterable[bytes]: if REDIRECT_VIA_LOGGING: yield from iterable return self.start() - task_id = self._progress.add_task(description=description, total=total) + task_id = self._progress.add_task("downloading", total=total, episode=episode) try: for it in iterable: yield it