Skip to content

Commit

Permalink
[imgur] distinguish album and gallery URLs (#380)
Browse files Browse the repository at this point in the history
A gallery can be either an album or a single image.
  • Loading branch information
mikf committed Aug 14, 2019
1 parent 2325135 commit 829b1cc
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 22 deletions.
2 changes: 1 addition & 1 deletion docs/supportedsites.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ ImageFap https://imagefap.com/ Images from Users, Gall
ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional
imgbox https://imgbox.com/ Galleries, individual Images
imgth https://imgth.com/ Galleries
imgur https://imgur.com/ Albums, individual Images
imgur https://imgur.com/ Albums, Galleries, individual Images
Instagram https://www.instagram.com/ |instagram-C| Optional
Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga
Joyreactor http://joyreactor.cc/ |joyreactor-C|
Expand Down
69 changes: 48 additions & 21 deletions gallery_dl/extractor/imgur.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ class ImgurExtractor(Extractor):

def __init__(self, match):
Extractor.__init__(self, match)
self.item_id = match.group(1)
self.key = match.group(1)
self.mp4 = self.config("mp4", True)

def _get_data(self, path):
def _extract_data(self, path):
response = self.request(self.root + path, notfound=self.subcategory)
data = text.extract(response.text, "image : ", ",\n")[0]
return self._clean(json.loads(data))
data = json.loads(text.extract(
response.text, "image : ", ",\n")[0])
try:
del data["adConfig"]
del data["isAd"]
except KeyError:
pass
return data

def _prepare(self, image):
image["ext"] = image["ext"].partition("?")[0]
Expand All @@ -37,18 +43,9 @@ def _prepare(self, image):
image["extension"] = image["ext"][1:]
return url

@staticmethod
def _clean(data):
try:
del data["adConfig"]
del data["isAd"]
except KeyError:
pass
return data


class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images from imgur.com"""
"""Extractor for individual images on imgur.com"""
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
Expand Down Expand Up @@ -101,22 +98,21 @@ class ImgurImageExtractor(ImgurExtractor):
)

def items(self):
image = self._get_data("/" + self.item_id)
image = self._extract_data("/" + self.key)
url = self._prepare(image)

yield Message.Version, 1
yield Message.Directory, image
yield Message.Url, url, image


class ImgurAlbumExtractor(ImgurExtractor):
"""Extractor for image albums from imgur.com"""
"""Extractor for imgur albums"""
subcategory = "album"
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
archive_fmt = "{album[hash]}_{hash}"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?:a|gallery|t/unmuted)/(\w{7}|\w{5})")
r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
Expand Down Expand Up @@ -147,7 +143,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
"width": int,
},
}),
("https://imgur.com/gallery/eD9CT", { # large album
("https://imgur.com/a/eD9CT", { # large album
"url": "4ee94de31ff26be416271bc0b1ea27b9349c9937",
}),
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
Expand All @@ -164,13 +160,14 @@ class ImgurAlbumExtractor(ImgurExtractor):
)

def items(self):
album = self._get_data("/a/" + self.item_id + "/all")
album = self._extract_data("/a/" + self.key + "/all")
album["title"] = album["title"].strip()
images = album["album_images"]["images"]
del album["album_images"]

if int(album["num_images"]) > len(images):
url = "{}/ajaxalbums/getimages/{}/hit.json".format(
self.root, self.item_id)
self.root, self.key)
images = self.request(url).json()["data"]["images"]

yield Message.Version, 1
Expand All @@ -180,3 +177,33 @@ def items(self):
image["num"] = num
image["album"] = album
yield Message.Url, url, image


class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/gallery/(\w{7}|\w{5})")
test = (
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
"pattern": "https://imgur.com/zf2fIms",
}),
("https://imgur.com/gallery/eD9CT", {
"pattern": "https://imgur.com/a/eD9CT",
}),
)

def items(self):
url = self.root + "/a/" + self.key
response = self.request(url, method="HEAD", fatal=False)

if response.status_code < 400:
extr = ImgurAlbumExtractor
path = "/a/"
else:
extr = ImgurImageExtractor
path = "/"

response.close()
yield Message.Version, 1
yield Message.Queue, self.root + path + self.key, {"_extractor": extr}

0 comments on commit 829b1cc

Please sign in to comment.