From 421a9740a344d6c18358592dfd7970960d87d6c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 15 Jan 2018 18:27:58 +0100 Subject: [PATCH] [tumblr] add 'tumblr:' to force Tumblr extractor (#71) --- gallery_dl/extractor/pinterest.py | 1 - gallery_dl/extractor/tumblr.py | 32 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index a5a3447295..8fa1aeccd6 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -71,7 +71,6 @@ class PinterestBoardExtractor(PinterestExtractor): ("https://www.pinterest.com/g1952849/test-/", { "url": "705ee521630a5d613b0449d694a5345e684572a9", "keyword": "1650dd31c4dedd940cef399135e485400625ec0b", - "content": "30897fb5d5616765bb2c9c26cb84f54499424fb4", }), ("https://www.pinterest.com/g1952848/test/", { "exception": exception.NotFoundError, diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 555e1e6c06..aeb06c56b7 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -35,6 +35,10 @@ def _original_video(url): POST_TYPES = frozenset(( "text", "quote", "link", "answer", "video", "audio", "photo", "chat")) +BASE_PATTERN = ( + r"(?:tumblr:(?:https?://)?([^/]+)|" + r"(?:https?://)?([^.]+\.tumblr\.com))") + class TumblrExtractor(Extractor): """Base class for tumblr extractors""" @@ -44,7 +48,7 @@ class TumblrExtractor(Extractor): def __init__(self, match): Extractor.__init__(self) - self.user = match.group(1) + self.blog = match.group(1) or match.group(2) self.api = TumblrAPI(self) self.types = self._setup_posttypes() @@ -58,7 +62,7 @@ def __init__(self, match): self.log.warning("no valid post types selected") def items(self): - blog = self.api.info(self.user) + blog = self.api.info(self.blog) yield Message.Version, 1 yield Message.Directory, blog @@ -139,7 +143,7 @@ def _prepare(url, post): class TumblrUserExtractor(TumblrExtractor): """Extractor for all images from a tumblr-user""" subcategory = "user" - pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com(?:/page/\d+)?/?$"] + pattern = [BASE_PATTERN + r"(?:/page/\d+)?/?$"] test = [ ("http://demo.tumblr.com/", { "pattern": (r"https?://\d+\.media\.tumblr\.com" @@ -154,16 +158,18 @@ class TumblrUserExtractor(TumblrExtractor): "options": (("posts", "all"), ("external", True), ("inline", True), ("reblogs", True)) }), + ("tumblr:http://www.b-authentique.com/", None), + ("tumblr:www.b-authentique.com", None), ] def posts(self): - return self.api.posts(self.user, {}) + return self.api.posts(self.blog, {}) class TumblrPostExtractor(TumblrExtractor): """Extractor for images from a single post on tumblr""" subcategory = "post" - pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/post/(\d+)"] + pattern = [BASE_PATTERN + r"/post/(\d+)"] test = [("http://demo.tumblr.com/post/459265350", { "pattern": r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg", "count": 1, @@ -171,11 +177,11 @@ class TumblrPostExtractor(TumblrExtractor): def __init__(self, match): TumblrExtractor.__init__(self, match) - self.post_id = match.group(2) + self.post_id = match.group(3) self.reblogs = True def posts(self): - return self.api.posts(self.user, {"id": self.post_id}) + return self.api.posts(self.blog, {"id": self.post_id}) @staticmethod def _setup_posttypes(): @@ -185,7 +191,7 @@ def _setup_posttypes(): class TumblrTagExtractor(TumblrExtractor): """Extractor for images from a tumblr-user by tag""" subcategory = "tag" - pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/tagged/(.+)"] + pattern = [BASE_PATTERN + r"/tagged/([^/?&#]+)"] test = [("http://demo.tumblr.com/tagged/Times%20Square", { "pattern": r"https://\d+\.media\.tumblr\.com/tumblr_[^/_]+_1280.jpg", "count": 1, @@ -193,23 +199,23 @@ class TumblrTagExtractor(TumblrExtractor): def __init__(self, match): TumblrExtractor.__init__(self, match) - self.tag = text.unquote(match.group(2)) + self.tag = text.unquote(match.group(3)) def posts(self): - return self.api.posts(self.user, {"tag": self.tag}) + return self.api.posts(self.blog, {"tag": self.tag}) class TumblrLikesExtractor(TumblrExtractor): """Extractor for images from a tumblr-user by tag""" subcategory = "likes" directory_fmt = ["{category}", "{name}", "likes"] - pattern = [r"(?:https?://)?([^.]+)\.tumblr\.com/likes"] + pattern = [BASE_PATTERN + r"/likes"] test = [("http://mikf123.tumblr.com/likes", { "count": 1, })] def posts(self): - return self.api.likes(self.user) + return self.api.likes(self.blog) class TumblrAPI(): @@ -262,7 +268,7 @@ def likes(self, blog): def _call(self, blog, endpoint, params): if self.api_key: params["api_key"] = self.api_key - url = "https://api.tumblr.com/v2/blog/{}.tumblr.com/{}".format( + url = "https://api.tumblr.com/v2/blog/{}/{}".format( blog, endpoint) response = self.session.get(url, params=params).json()