Skip to content

Commit

Permalink
[4plebs] add thread extractor (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jul 3, 2017
1 parent dcc1d3b commit 474e9c1
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 1 deletion.
22 changes: 22 additions & 0 deletions gallery_dl/extractor/4plebs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-

# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extract images from https://archive.4plebs.org/"""

from . import chan


class FourplebsThreadExtractor(chan.FoolfuukaThreadExtractor):
"""Extractor for images from threads on 4plebs.org"""
category = "4plebs"
pattern = [r"(?:https?://)?(?:archive\.)?4plebs\.org/([^/]+)/thread/(\d+)"]
test = [("https://archive.4plebs.org/tg/thread/54111182/", {
"url": "85f54faf037dee29ad1c413142bcc45cd905be5a",
"keyword": "59c414bddc58b77b3e481fbe1c4e4ea3d582b2d3",
})]
root = "https://archive.4plebs.org"
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"pixiv",
"3dbooru",
"4chan",
"4plebs",
"8chan",
"batoto",
"danbooru",
Expand Down
37 changes: 36 additions & 1 deletion gallery_dl/extractor/chan.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2015, 2016 Mike Fährmann
# Copyright 2015-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand Down Expand Up @@ -58,3 +58,38 @@ def get_thread_title(post):
"""Return thread title from first post"""
title = post["sub"] if "sub" in post else text.remove_html(post["com"])
return text.unescape(title)[:50]


class FoolfuukaThreadExtractor(Extractor):
"""Base extractor for FoolFuuka based boards/archives"""
category = "foolfuuka"
subcategory = "thread"
directory_fmt = ["{category}", "{board[shortname]}",
"{thread_num} - {title}"]
filename_fmt = "{media[media]}"
root = ""

def __init__(self, match):
Extractor.__init__(self)
self.board, self.thread = match.groups()

def items(self):
op = True
yield Message.Version, 1
for post in self.posts():
if op:
yield Message.Directory, post
op = False
if not post["media"]:
continue
url = post["media"]["media_link"]
post["extension"] = url.rpartition(".")[2]
yield Message.Url, url, post

def posts(self):
url = self.root + "/_/api/chan/thread/"
params = {"board": self.board, "num": self.thread}
data = self.request(url, params=params).json()[self.thread]

yield data["op"]
yield from data["posts"].values()

0 comments on commit 474e9c1

Please sign in to comment.