[hiperdex] add chapter and manga extractors (closes #606)

mikf · Feb 22, 2020 · cc5079c · cc5079c
1 parent 64bdec8
commit cc5079c
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 0 deletions.
diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst
@@ -46,6 +46,7 @@ HentaiFox            https://hentaifox.com/              Galleries, Search Resul
 HentaiHand           https://hentaihand.com/             Galleries, Search Results, Tag-Searches
 HentaiHere           https://hentaihere.com/             Chapters, Manga
 Hentainexus          https://hentainexus.com/            Galleries, Search Results
+Hiperdex             https://hiperdex.com/               Chapters, Manga
 Hitomi.la            https://hitomi.la/                  Galleries
 Hypnohub             https://hypnohub.net/               Pools, Popular Images, Posts, Tag-Searches
 Idol Complex         https://idol.sankakucomplex.com/    Pools, Posts, Tag-Searches                         Optional

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -42,6 +42,7 @@
     "hentaihand",
     "hentaihere",
     "hentainexus",
+    "hiperdex",
     "hitomi",
     "hypnohub",
     "idolcomplex",

diff --git a/gallery_dl/extractor/hiperdex.py b/gallery_dl/extractor/hiperdex.py
@@ -0,0 +1,138 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://hiperdex.com/"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text
+from ..cache import memcache
+import re
+
+
+class HiperdexBase():
+    """Base class for hiperdex extractors"""
+    category = "hiperdex"
+    root = "https://hiperdex.com"
+
+    @memcache(keyarg=1)
+    def manga_data(self, manga, page=None):
+        if not page:
+            url = "{}/manga/{}/".format(self.root, manga)
+            page = self.request(url).text
+        extr = text.extract_from(page)
+
+        return {
+            "manga"  : text.unescape(extr(
+                "<title>", "<").rpartition("&")[0].strip()),
+            "score"  : text.parse_float(extr(
+                'id="averagerate">', '<')),
+            "author" : text.remove_html(extr(
+                'class="author-content">', '</div>')),
+            "artist" : text.remove_html(extr(
+                'class="artist-content">', '</div>')),
+            "genre"  : text.split_html(extr(
+                'class="genres-content">', '</div>'))[::2],
+            "type"   : extr(
+                'class="summary-content">', '<').strip(),
+            "release": text.parse_int(text.remove_html(extr(
+                'class="summary-content">', '</div>'))),
+            "status" : extr(
+                'class="summary-content">', '<').strip(),
+            "description": text.remove_html(text.unescape(extr(
+                'class="description-summary">', '</div>'))),
+            "language": "English",
+            "lang"    : "en",
+        }
+
+    def chapter_data(self, chapter):
+        chapter, _, minor = chapter.partition("-")
+        data = {
+            "chapter"      : text.parse_int(chapter),
+            "chapter-minor": "." + minor if minor and minor != "end" else "",
+        }
+        data.update(self.manga_data(self.manga.lower()))
+        return data
+
+
+class HiperdexChapterExtractor(HiperdexBase, ChapterExtractor):
+    """Extractor for manga chapters from hiperdex.com"""
+    archive_fmt = "{manga}_{chapter}_{page}"
+    pattern = (r"(?:https?://)?(?:www\.)?hiperdex\.com"
+               r"(/manga/([^/?&#]+)/([^/?&#]+))")
+    test = ("https://hiperdex.com/manga/domestic-na-kanojo/154-5/", {
+        "url": "111bc3ee14ce91d78c275770ef63b56c9ac15d8d",
+        "keyword": {
+            "artist" : "Sasuga Kei",
+            "author" : "Sasuga Kei",
+            "chapter": 154,
+            "chapter-minor": ".5",
+            "description": "re:Natsuo Fujii is in love with his teacher, Hina",
+            "genre"  : list,
+            "manga"  : "Domestic na Kanojo",
+            "release": 2014,
+            "score"  : float,
+            "type"   : "Manga",
+        },
+    })
+
+    def __init__(self, match):
+        path, self.manga, self.chapter = match.groups()
+        ChapterExtractor.__init__(self, match, self.root + path + "/")
+
+    def metadata(self, _):
+        return self.chapter_data(self.chapter)
+
+    def images(self, page):
+        return [
+            (url.strip(), None)
+            for url in re.findall(r'id="image-\d+"\s+src="([^"]+)', page)
+        ]
+
+
+class HiperdexMangaExtractor(HiperdexBase, MangaExtractor):
+    """Extractor for manga from hiperdex.com"""
+    chapterclass = HiperdexChapterExtractor
+    pattern = r"(?:https?://)?(?:www\.)?hiperdex\.com(/manga/([^/?&#]+))/?$"
+    test = ("https://hiperdex.com/manga/youre-not-that-special/", {
+        "count": 51,
+        "pattern": HiperdexChapterExtractor.pattern,
+        "keyword": {
+            "artist" : "Bolp",
+            "author" : "Abyo4",
+            "chapter": int,
+            "chapter-minor": "",
+            "description": "re:I didn’t think much of the creepy girl in ",
+            "genre"  : list,
+            "manga"  : "You're Not That Special!",
+            "release": 2019,
+            "score"  : float,
+            "status" : "Completed",
+            "type"   : "Manhwa",
+        },
+    })
+
+    def __init__(self, match):
+        path, self.manga = match.groups()
+        MangaExtractor.__init__(self, match, self.root + path + "/")
+
+    def chapters(self, page):
+        self.manga_data(self.manga, page)
+        results = []
+        last = None
+
+        page = text.extract(page, 'class="page-content-listing', '</ul>')[0]
+        for match in HiperdexChapterExtractor.pattern.finditer(page):
+            path = match.group(1)
+            if last != path:
+                last = path
+                results.append((
+                    self.root + path,
+                    self.chapter_data(path.rpartition("/")[2]),
+                ))
+
+        return results