[kissmanga][readcomiconline] add 'captcha' option (#279)

to configure how to handle CAPTCHA page redirects: - either interactively wait for the user to solve the CAPTCHA - or raise StopExtraction like before
mikf · May 27, 2019 · 4465a3e · 4465a3e
1 parent e30ada1
commit 4465a3e
Show file tree

Hide file tree

Showing 4 changed files with 60 additions and 22 deletions.
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
 =========== =====
 
 
+extractor.kissmanga.captcha
+---------------------------
+=========== =====
+Type        ``string``
+Default     ``"stop"``
+Description Controls how to handle redirects to CAPTCHA pages.
+
+            * ``"stop``: Stop the current extractor run.
+            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
+=========== =====
+
+
 extractor.oauth.browser
 -----------------------
 =========== =====
@@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
 =========== =====
 
 
+extractor.readcomiconline.captcha
+---------------------------------
+=========== =====
+Type        ``string``
+Default     ``"stop"``
+Description Controls how to handle redirects to CAPTCHA pages.
+
+            * ``"stop``: Stop the current extractor run.
+            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
+=========== =====
+
+
 extractor.recursive.blacklist
 -----------------------------
 =========== =====

diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
@@ -62,6 +62,10 @@
         {
             "mp4": true
         },
+        "kissmanga":
+        {
+            "captcha": "stop"
+        },
         "nijie":
         {
             "username": null,
@@ -82,6 +86,10 @@
             "wait-min": 3.0,
             "wait-max": 6.0
         },
+        "readcomiconline":
+        {
+            "captcha": "stop"
+        },
         "recursive":
         {
             "blacklist": ["directlink", "oauth", "recursive", "test"]

diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
@@ -8,29 +8,43 @@
 
 """Extract manga-chapters and entire manga from https://kissmanga.com/"""
 
-from .common import ChapterExtractor, MangaExtractor
+from .common import ChapterExtractor, MangaExtractor, Extractor
 from .. import text, aes, exception
 from ..cache import cache
 import hashlib
 import ast
 import re
 
 
-class KissmangaBase():
+class RedirectMixin():
+    """Detect and handle redirects to CAPTCHA pages"""
+
+    def request(self, url):
+        while True:
+            response = Extractor.request(self, url)
+            if not response.history or "/AreYouHuman" not in response.url:
+                return response
+            if self.config("captcha", "stop") == "wait":
+                self.log.warning(
+                    "Redirect to \n%s\nVisit this URL in your browser, solve "
+                    "the CAPTCHA, and press ENTER to continue", response.url)
+                try:
+                    input()
+                except (EOFError, OSError):
+                    pass
+            else:
+                self.log.error(
+                    "Redirect to \n%s\nVisit this URL in your browser and "
+                    "solve the CAPTCHA to continue", response.url)
+                raise exception.StopExtraction()
+
+
+class KissmangaBase(RedirectMixin):
     """Base class for kissmanga extractors"""
     category = "kissmanga"
     archive_fmt = "{chapter_id}_{page}"
     root = "https://kissmanga.com"
 
-    def request(self, url):
-        response = super().request(url)
-        if response.history and "/AreYouHuman" in response.url:
-            self.log.error("Redirect to \n%s\n"
-                           "Visit this URL in your browser and solve "
-                           "the CAPTCHA to continue.", response.url)
-            raise exception.StopExtraction()
-        return response
-
     @staticmethod
     def parse_chapter_string(data):
         """Parse 'chapter_string' value contained in 'data'"""

diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
@@ -9,27 +9,19 @@
 """Extract comic-issues and entire comics from https://readcomiconline.to/"""
 
 from .common import ChapterExtractor, MangaExtractor
-from .. import text, exception
+from .kissmanga import RedirectMixin
+from .. import text
 import re
 
 
-class ReadcomiconlineBase():
+class ReadcomiconlineBase(RedirectMixin):
     """Base class for readcomiconline extractors"""
     category = "readcomiconline"
     directory_fmt = ("{category}", "{comic}", "{issue:>03}")
     filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
     archive_fmt = "{issue_id}_{page}"
     root = "https://readcomiconline.to"
 
-    def request(self, url):
-        response = super().request(url)
-        if response.history and "/AreYouHuman" in response.url:
-            self.log.error("Redirect to \n%s\n"
-                           "Visit this URL in your browser and solve "
-                           "the CAPTCHA to continue.", response.url)
-            raise exception.StopExtraction()
-        return response
-
 
 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
     """Extractor for comic-issues from readcomiconline.to"""