diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index cd34f2dc23..6c8e8a7cbb 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -51,7 +51,7 @@ ImageFap https://imagefap.com/ Images from Users, Gall ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries -imgur https://imgur.com/ Albums, individual Images +imgur https://imgur.com/ Albums, Galleries, individual Images Instagram https://www.instagram.com/ |instagram-C| Optional Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga Joyreactor http://joyreactor.cc/ |joyreactor-C| diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index c5e3d176ac..f754e04b6b 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -20,13 +20,19 @@ class ImgurExtractor(Extractor): def __init__(self, match): Extractor.__init__(self, match) - self.item_id = match.group(1) + self.key = match.group(1) self.mp4 = self.config("mp4", True) - def _get_data(self, path): + def _extract_data(self, path): response = self.request(self.root + path, notfound=self.subcategory) - data = text.extract(response.text, "image : ", ",\n")[0] - return self._clean(json.loads(data)) + data = json.loads(text.extract( + response.text, "image : ", ",\n")[0]) + try: + del data["adConfig"] + del data["isAd"] + except KeyError: + pass + return data def _prepare(self, image): image["ext"] = image["ext"].partition("?")[0] @@ -37,18 +43,9 @@ def _prepare(self, image): image["extension"] = image["ext"][1:] return url - @staticmethod - def _clean(data): - try: - del data["adConfig"] - del data["isAd"] - except KeyError: - pass - return data - class ImgurImageExtractor(ImgurExtractor): - """Extractor for individual images from imgur.com""" + """Extractor for individual images on imgur.com""" subcategory = "image" filename_fmt = "{category}_{hash}{title:?_//}.{extension}" archive_fmt = "{hash}" @@ -101,22 +98,21 @@ class ImgurImageExtractor(ImgurExtractor): ) def items(self): - image = self._get_data("/" + self.item_id) + image = self._extract_data("/" + self.key) url = self._prepare(image) - yield Message.Version, 1 yield Message.Directory, image yield Message.Url, url, image class ImgurAlbumExtractor(ImgurExtractor): - """Extractor for image albums from imgur.com""" + """Extractor for imgur albums""" subcategory = "album" directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" archive_fmt = "{album[hash]}_{hash}" pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})") + r"/(?:a|t/unmuted)/(\w{7}|\w{5})") test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -147,7 +143,7 @@ class ImgurAlbumExtractor(ImgurExtractor): "width": int, }, }), - ("https://imgur.com/gallery/eD9CT", { # large album + ("https://imgur.com/a/eD9CT", { # large album "url": "4ee94de31ff26be416271bc0b1ea27b9349c9937", }), ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash @@ -164,13 +160,14 @@ class ImgurAlbumExtractor(ImgurExtractor): ) def items(self): - album = self._get_data("/a/" + self.item_id + "/all") + album = self._extract_data("/a/" + self.key + "/all") + album["title"] = album["title"].strip() images = album["album_images"]["images"] del album["album_images"] if int(album["num_images"]) > len(images): url = "{}/ajaxalbums/getimages/{}/hit.json".format( - self.root, self.item_id) + self.root, self.key) images = self.request(url).json()["data"]["images"] yield Message.Version, 1 @@ -180,3 +177,33 @@ def items(self): image["num"] = num image["album"] = album yield Message.Url, url, image + + +class ImgurGalleryExtractor(ImgurExtractor): + """Extractor for imgur galleries""" + subcategory = "gallery" + pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" + r"/gallery/(\w{7}|\w{5})") + test = ( + ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) + "pattern": "https://imgur.com/zf2fIms", + }), + ("https://imgur.com/gallery/eD9CT", { + "pattern": "https://imgur.com/a/eD9CT", + }), + ) + + def items(self): + url = self.root + "/a/" + self.key + response = self.request(url, method="HEAD", fatal=False) + + if response.status_code < 400: + extr = ImgurAlbumExtractor + path = "/a/" + else: + extr = ImgurImageExtractor + path = "/" + + response.close() + yield Message.Version, 1 + yield Message.Queue, self.root + path + self.key, {"_extractor": extr}