From 4465a3ea684c0e1f7e77b1558945d62c64b769de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Mon, 27 May 2019 22:24:48 +0200
Subject: [PATCH] [kissmanga][readcomiconline] add 'captcha' option (#279)

to configure how to handle CAPTCHA page redirects:
- either interactively wait for the user to solve the CAPTCHA
- or raise StopExtraction like before
---
 docs/configuration.rst                  | 24 +++++++++++++++++
 docs/gallery-dl.conf                    |  8 ++++++
 gallery_dl/extractor/kissmanga.py       | 36 +++++++++++++++++--------
 gallery_dl/extractor/readcomiconline.py | 14 +++-------
 4 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index cc4252111f..0131663ae2 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
 =========== =====
 
 
+extractor.kissmanga.captcha
+---------------------------
+=========== =====
+Type        ``string``
+Default     ``"stop"``
+Description Controls how to handle redirects to CAPTCHA pages.
+
+            * ``"stop``: Stop the current extractor run.
+            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
+=========== =====
+
+
 extractor.oauth.browser
 -----------------------
 =========== =====
@@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
 =========== =====
 
 
+extractor.readcomiconline.captcha
+---------------------------------
+=========== =====
+Type        ``string``
+Default     ``"stop"``
+Description Controls how to handle redirects to CAPTCHA pages.
+
+            * ``"stop``: Stop the current extractor run.
+            * ``"wait``: Ask the user to solve the CAPTCHA and wait.
+=========== =====
+
+
 extractor.recursive.blacklist
 -----------------------------
 =========== =====
diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
index 2dfd5df55d..0891cf5fc7 100644
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@@ -62,6 +62,10 @@
         {
             "mp4": true
         },
+        "kissmanga":
+        {
+            "captcha": "stop"
+        },
         "nijie":
         {
             "username": null,
@@ -82,6 +86,10 @@
             "wait-min": 3.0,
             "wait-max": 6.0
         },
+        "readcomiconline":
+        {
+            "captcha": "stop"
+        },
         "recursive":
         {
             "blacklist": ["directlink", "oauth", "recursive", "test"]
diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py
index 519dc9731b..19e0a26a63 100644
--- a/gallery_dl/extractor/kissmanga.py
+++ b/gallery_dl/extractor/kissmanga.py
@@ -8,7 +8,7 @@
 
 """Extract manga-chapters and entire manga from https://kissmanga.com/"""
 
-from .common import ChapterExtractor, MangaExtractor
+from .common import ChapterExtractor, MangaExtractor, Extractor
 from .. import text, aes, exception
 from ..cache import cache
 import hashlib
@@ -16,21 +16,35 @@
 import re
 
 
-class KissmangaBase():
+class RedirectMixin():
+    """Detect and handle redirects to CAPTCHA pages"""
+
+    def request(self, url):
+        while True:
+            response = Extractor.request(self, url)
+            if not response.history or "/AreYouHuman" not in response.url:
+                return response
+            if self.config("captcha", "stop") == "wait":
+                self.log.warning(
+                    "Redirect to \n%s\nVisit this URL in your browser, solve "
+                    "the CAPTCHA, and press ENTER to continue", response.url)
+                try:
+                    input()
+                except (EOFError, OSError):
+                    pass
+            else:
+                self.log.error(
+                    "Redirect to \n%s\nVisit this URL in your browser and "
+                    "solve the CAPTCHA to continue", response.url)
+                raise exception.StopExtraction()
+
+
+class KissmangaBase(RedirectMixin):
     """Base class for kissmanga extractors"""
     category = "kissmanga"
     archive_fmt = "{chapter_id}_{page}"
     root = "https://kissmanga.com"
 
-    def request(self, url):
-        response = super().request(url)
-        if response.history and "/AreYouHuman" in response.url:
-            self.log.error("Redirect to \n%s\n"
-                           "Visit this URL in your browser and solve "
-                           "the CAPTCHA to continue.", response.url)
-            raise exception.StopExtraction()
-        return response
-
     @staticmethod
     def parse_chapter_string(data):
         """Parse 'chapter_string' value contained in 'data'"""
diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py
index 06b202ba15..dda48090ac 100644
--- a/gallery_dl/extractor/readcomiconline.py
+++ b/gallery_dl/extractor/readcomiconline.py
@@ -9,11 +9,12 @@
 """Extract comic-issues and entire comics from https://readcomiconline.to/"""
 
 from .common import ChapterExtractor, MangaExtractor
-from .. import text, exception
+from .kissmanga import RedirectMixin
+from .. import text
 import re
 
 
-class ReadcomiconlineBase():
+class ReadcomiconlineBase(RedirectMixin):
     """Base class for readcomiconline extractors"""
     category = "readcomiconline"
     directory_fmt = ("{category}", "{comic}", "{issue:>03}")
@@ -21,15 +22,6 @@ class ReadcomiconlineBase():
     archive_fmt = "{issue_id}_{page}"
     root = "https://readcomiconline.to"
 
-    def request(self, url):
-        response = super().request(url)
-        if response.history and "/AreYouHuman" in response.url:
-            self.log.error("Redirect to \n%s\n"
-                           "Visit this URL in your browser and solve "
-                           "the CAPTCHA to continue.", response.url)
-            raise exception.StopExtraction()
-        return response
-
 
 class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
     """Extractor for comic-issues from readcomiconline.to"""