From 6fe9a134bf98b930d9a13b153ece3f964793c945 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 6 Sep 2019 21:58:13 +0200 Subject: [PATCH] [lineblog] add blog and post extractors (closes #404) --- docs/supportedsites.rst | 1 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/lineblog.py | 73 ++++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 76 insertions(+) create mode 100644 gallery_dl/extractor/lineblog.py diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 9c3a9b80ea..03479da7f5 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -62,6 +62,7 @@ Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga https://kissmanga.com/ Chapters, Manga Komikcast https://komikcast.com/ Chapters, Manga Konachan https://konachan.com/ Pools, Popular Images, Posts, Tag-Searches +LINE BLOG https://www.lineblog.me/ Blogs, Posts livedoor Blog http://blog.livedoor.jp/ Blogs, Posts Luscious https://luscious.net/ Albums, Search Results Optional Manga Fox https://fanfox.net/ Chapters diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 677e64bb9e..351c5dfdab 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -54,6 +54,7 @@ "kissmanga", "komikcast", "konachan", + "lineblog", "livedoor", "luscious", "mangadex", diff --git a/gallery_dl/extractor/lineblog.py b/gallery_dl/extractor/lineblog.py new file mode 100644 index 0000000000..a1daa3914b --- /dev/null +++ b/gallery_dl/extractor/lineblog.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://www.lineblog.me/""" + +from .livedoor import LivedoorBlogExtractor, LivedoorPostExtractor +from .. import text + + +class LineblogBase(): + """Base class for lineblog extractors""" + category = "lineblog" + root = "https://lineblog.me" + + def _images(self, post): + imgs = [] + body = post.pop("body") + + for num, img in enumerate(text.extract_iter(body, ""), 1): + src = text.extract(img, 'src="', '"')[0] + alt = text.extract(img, 'alt="', '"')[0] + + if not src: + continue + if src.startswith("https://obs.line-scdn.") and src.count("/") > 3: + src = src.rpartition("/")[0] + + imgs.append(text.nameext_from_url(alt or src, { + "url" : src, + "num" : num, + "hash": src.rpartition("/")[2], + "post": post, + })) + + return imgs + + +class LineblogBlogExtractor(LineblogBase, LivedoorBlogExtractor): + """Extractor for a user's blog on lineblog.me""" + pattern = r"(?:https?://)?lineblog\.me/(\w+)/?(?:$|[?&#])" + test = ("https://lineblog.me/mamoru_miyano/", { + "range": "1-20", + "count": 20, + "pattern": r"https://obs.line-scdn.net/[\w-]+$", + "keyword": { + "post": { + "categories" : tuple, + "date" : "type:datetime", + "description": str, + "id" : int, + "tags" : list, + "title" : str, + "user" : "mamoru_miyano" + }, + "filename": str, + "hash" : r"re:\w{32,}", + "num" : int, + }, + }) + + +class LineblogPostExtractor(LineblogBase, LivedoorPostExtractor): + """Extractor for blog posts on lineblog.me""" + pattern = r"(?:https?://)?lineblog\.me/(\w+)/archives/(\d+)" + test = ("https://lineblog.me/mamoru_miyano/archives/1919150.html", { + "url": "24afeb4044c554f80c374b52bf8109c6f1c0c757", + "keyword": "76a38e2c0074926bd3362f66f9fc0e6c41591dcb", + }) diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 78963aa219..2213ffa86e 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -45,6 +45,7 @@ "jaiminisbox" : "Jaimini's Box", "kireicake" : "Kirei Cake", "kissmanga" : "KissManga", + "lineblog" : "LINE BLOG", "livedoor" : "livedoor Blog", "mangadex" : "MangaDex", "mangafox" : "Manga Fox",