Skip to content

Commit

Permalink
[behance] add 'collection' extractor (closes #157)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 19, 2019
1 parent b8fed34 commit 217a068
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 16 deletions.
53 changes: 39 additions & 14 deletions gallery_dl/extractor/behance.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2018 Mike Fährmann
# Copyright 2018-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -17,6 +17,26 @@ class BehanceExtractor(Extractor):
category = "behance"
root = "https://www.behance.net"

def items(self):
yield Message.Version, 1
for gallery in self.galleries():
yield Message.Queue, gallery["url"], gallery

def galleries(self):
"""Return all relevant gallery URLs"""
return ()

def _pagination(self, url, key):
headers = {"X-Requested-With": "XMLHttpRequest"}
params = {}

while True:
data = self.request(url, headers=headers, params=params).json()
yield from data[key]
if not data.get("offset"):
return
params["offset"] = data["offset"]


class BehanceGalleryExtractor(BehanceExtractor):
"""Extractor for image galleries from www.behance.net"""
Expand Down Expand Up @@ -122,8 +142,7 @@ class BehanceUserExtractor(BehanceExtractor):
"""Extractor for a user's galleries from www.behance.net"""
subcategory = "user"
categorytransfer = True
pattern = [r"(?:https?://)?(?:www\.)?behance\.net"
r"/(?!gallery/)([^/?&#]+)/?$"]
pattern = [r"(?:https?://)?(?:www\.)?behance\.net/([^/?&#]+)/?$"]
test = [("https://www.behance.net/alexstrohl", {
"count": ">= 8",
"pattern": BehanceGalleryExtractor.pattern[0],
Expand All @@ -133,18 +152,24 @@ def __init__(self, match):
BehanceExtractor.__init__(self)
self.user = match.group(1)

def items(self):
def galleries(self):
url = "{}/{}".format(self.root, self.user)
headers = {"X-Requested-With": "XMLHttpRequest"}
params = {"offset": None}
return self._pagination(url, "section_content")

yield Message.Version, 1
while True:
data = self.request(url, headers=headers, params=params).json()

for gallery in data["section_content"]:
yield Message.Queue, gallery["url"], gallery
class BehanceCollectionExtractor(BehanceExtractor):
"""Extractor for a collection's galleries from www.behance.net"""
subcategory = "collection"
pattern = [r"(?:https?://)?(?:www\.)?behance\.net/collection/(\d+)"]
test = [("https://www.behance.net/collection/170615607/Sky", {
"count": ">= 13",
"pattern": BehanceGalleryExtractor.pattern[0],
})]

if "offset" not in data:
return
params["offset"] = data["offset"]
def __init__(self, match):
BehanceExtractor.__init__(self)
self.collection_id = match.group(1)

def galleries(self):
url = "{}/collection/{}/a".format(self.root, self.collection_id)
return self._pagination(url, "output")
4 changes: 2 additions & 2 deletions gallery_dl/extractor/mangapark.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
r"(/manga/[^/?&#]+)/?$"]
test = [
("https://mangapark.me/manga/aria", {
"url": "aae6bf44e4360a1b0f5aa5fd74339cac6d616d20",
"keyword": "b7440cc4cd68d0262703da1ceadaecd34bdaacb0",
"url": "a58be23ef3874fe9705b0b41dd462b67eaaafd9a",
"keyword": "b3b5a30aa2a326bc0ca8b74c65b5ecd4bf676ebf",
}),
("https://mangapark.net/manga/aria", None),
("https://mangapark.com/manga/aria", None),
Expand Down
1 change: 1 addition & 0 deletions test/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
BROKEN = {
"desuarchive",
"mangahere",
"ngomik",
"rbt",
}

Expand Down

0 comments on commit 217a068

Please sign in to comment.