Skip to content

Commit

Permalink
[kissmanga][readcomiconline] add 'captcha' option (#279)
Browse files Browse the repository at this point in the history
to configure how to handle CAPTCHA page redirects:
- either interactively wait for the user to solve the CAPTCHA
- or raise StopExtraction like before
  • Loading branch information
mikf committed May 27, 2019
1 parent e30ada1 commit 4465a3e
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 22 deletions.
24 changes: 24 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,18 @@ Description Controls whether to choose the GIF or MP4 version of an animation.
=========== =====


extractor.kissmanga.captcha
---------------------------
=========== =====
Type ``string``
Default ``"stop"``
Description Controls how to handle redirects to CAPTCHA pages.

* ``"stop``: Stop the current extractor run.
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
=========== =====


extractor.oauth.browser
-----------------------
=========== =====
Expand Down Expand Up @@ -646,6 +658,18 @@ Description Minimum and maximum wait time in seconds between HTTP requests
=========== =====


extractor.readcomiconline.captcha
---------------------------------
=========== =====
Type ``string``
Default ``"stop"``
Description Controls how to handle redirects to CAPTCHA pages.

* ``"stop``: Stop the current extractor run.
* ``"wait``: Ask the user to solve the CAPTCHA and wait.
=========== =====


extractor.recursive.blacklist
-----------------------------
=========== =====
Expand Down
8 changes: 8 additions & 0 deletions docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@
{
"mp4": true
},
"kissmanga":
{
"captcha": "stop"
},
"nijie":
{
"username": null,
Expand All @@ -82,6 +86,10 @@
"wait-min": 3.0,
"wait-max": 6.0
},
"readcomiconline":
{
"captcha": "stop"
},
"recursive":
{
"blacklist": ["directlink", "oauth", "recursive", "test"]
Expand Down
36 changes: 25 additions & 11 deletions gallery_dl/extractor/kissmanga.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,43 @@

"""Extract manga-chapters and entire manga from https://kissmanga.com/"""

from .common import ChapterExtractor, MangaExtractor
from .common import ChapterExtractor, MangaExtractor, Extractor
from .. import text, aes, exception
from ..cache import cache
import hashlib
import ast
import re


class KissmangaBase():
class RedirectMixin():
"""Detect and handle redirects to CAPTCHA pages"""

def request(self, url):
while True:
response = Extractor.request(self, url)
if not response.history or "/AreYouHuman" not in response.url:
return response
if self.config("captcha", "stop") == "wait":
self.log.warning(
"Redirect to \n%s\nVisit this URL in your browser, solve "
"the CAPTCHA, and press ENTER to continue", response.url)
try:
input()
except (EOFError, OSError):
pass
else:
self.log.error(
"Redirect to \n%s\nVisit this URL in your browser and "
"solve the CAPTCHA to continue", response.url)
raise exception.StopExtraction()


class KissmangaBase(RedirectMixin):
"""Base class for kissmanga extractors"""
category = "kissmanga"
archive_fmt = "{chapter_id}_{page}"
root = "https://kissmanga.com"

def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response

@staticmethod
def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'"""
Expand Down
14 changes: 3 additions & 11 deletions gallery_dl/extractor/readcomiconline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,19 @@
"""Extract comic-issues and entire comics from https://readcomiconline.to/"""

from .common import ChapterExtractor, MangaExtractor
from .. import text, exception
from .kissmanga import RedirectMixin
from .. import text
import re


class ReadcomiconlineBase():
class ReadcomiconlineBase(RedirectMixin):
"""Base class for readcomiconline extractors"""
category = "readcomiconline"
directory_fmt = ("{category}", "{comic}", "{issue:>03}")
filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}"
archive_fmt = "{issue_id}_{page}"
root = "https://readcomiconline.to"

def request(self, url):
response = super().request(url)
if response.history and "/AreYouHuman" in response.url:
self.log.error("Redirect to \n%s\n"
"Visit this URL in your browser and solve "
"the CAPTCHA to continue.", response.url)
raise exception.StopExtraction()
return response


class ReadcomiconlineIssueExtractor(ReadcomiconlineBase, ChapterExtractor):
"""Extractor for comic-issues from readcomiconline.to"""
Expand Down

0 comments on commit 4465a3e

Please sign in to comment.