From 9fccd7b7832d789a0bb2a79bdbbf5c4020eafd74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 19 Jan 2018 23:12:15 +0100 Subject: [PATCH] [tumblr] provide fallback URLs (#64) Each image now produces 3 URLs: - amazonaws.com _raw (or _1280 for older images) - amazonaws.com _500 - media.tumblr.com (URL returned by API) --- gallery_dl/extractor/tumblr.py | 40 ++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index aeb06c56b7..96056911af 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -15,12 +15,20 @@ def _original_image(url): - if url.endswith(".gif") and "_inline_" in url: - return url - return re.sub( - (r"https?://\d+\.media\.tumblr\.com" - r"/([0-9a-f]+/tumblr_[^/?&#.]+)_\d+\.([0-9a-z]+)"), - r"https://s3.amazonaws.com/data.tumblr.com/\1_raw.\2", url + match = re.match( + r"https?://\d+\.media\.tumblr\.com" + r"((/[0-9a-f]+)?/tumblr_[^/?&#.]+)_\d+\.([0-9a-z]+)", + url) + + if not match: + return (url,) + root = "https://s3.amazonaws.com/data.tumblr.com" + path, key, ext = match.groups() + + return ( + "".join((root, path, "_raw." if key else "_1280.", ext)), + "".join((root, path, "_500.", ext)), + url, ) @@ -90,7 +98,7 @@ def items(self): photo.update(photo["original_size"]) del photo["original_size"] del photo["alt_sizes"] - yield self._prepare(_original_image(photo["url"]), post) + yield self._prepare_image(photo["url"], post) if "audio_url" in post: # type: "audio" yield self._prepare(post["audio_url"], post) @@ -102,7 +110,7 @@ def items(self): for key in ("body", "description"): if key in post: for url in re.findall('