From ea81fa985f6311d968481fde8eb2b0d582f6d0fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Jun 2024 22:00:59 +0200 Subject: [PATCH] [archive] implement 'archive-event' option (#5784) With this, IDs of skipped files will no longer be written to an archive by default. Use "archive-event": "file,skip" to restore the previous behavior. --- docs/configuration.rst | 19 +++++++++++++++++++ gallery_dl/job.py | 18 ++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 7cacabb9f0..92fe883805 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -865,6 +865,25 @@ Description may pose a security risk. +extractor.*.archive-event +------------------------- +Type + + ``string`` + + ``list`` of ``strings`` +Default + ``"file"`` +Example + * ``"file,skip"`` + * ``["file", "skip"]`` +Description + `Event(s) `__ + for which IDs get written to an + `archive `__. + + Available events are: + ``file``, ``skip`` + + extractor.*.archive-format -------------------------- Type diff --git a/gallery_dl/job.py b/gallery_dl/job.py index 4562b05def..00e79c0691 100644 --- a/gallery_dl/job.py +++ b/gallery_dl/job.py @@ -315,7 +315,7 @@ def handle_url(self, url, kwdict): pathfmt.build_path() if pathfmt.exists(): - if archive: + if archive and self._archive_write_skip: archive.add(kwdict) self.handle_skip() return @@ -345,7 +345,7 @@ def handle_url(self, url, kwdict): return if not pathfmt.temppath: - if archive: + if archive and self._archive_write_skip: archive.add(kwdict) self.handle_skip() return @@ -359,7 +359,7 @@ def handle_url(self, url, kwdict): pathfmt.finalize() self.out.success(pathfmt.path) self._skipcnt = 0 - if archive: + if archive and self._archive_write_file: archive.add(kwdict) if "after" in hooks: for callback in hooks["after"]: @@ -561,6 +561,16 @@ def initialize(self, kwdict=None): else: extr.log.debug("Using download archive '%s'", archive_path) + events = cfg("archive-event") + if events is None: + self._archive_write_file = True + self._archive_write_skip = False + else: + if isinstance(events, str): + events = events.split(",") + self._archive_write_file = ("file" in events) + self._archive_write_skip = ("skip" in events) + skip = cfg("skip", True) if skip: self._skipexc = None @@ -676,7 +686,7 @@ def handle_url(self, url, kwdict): kwdict["extension"] = "jpg" if self.sleep: self.extractor.sleep(self.sleep(), "download") - if self.archive: + if self.archive and self._archive_write_skip: self.archive.add(kwdict) self.out.skip(self.pathfmt.build_filename(kwdict))