Skip to content

Commit

Permalink
[exhentai] catch more error states (#356, #360)
Browse files Browse the repository at this point in the history
- warn on MPV-enabled galleries
- catch parsing errors for gallery pages and image info
- write page content to debug output
  • Loading branch information
mikf committed Jul 29, 2019
1 parent a90280f commit 81b35ed
Showing 1 changed file with 29 additions and 13 deletions.
42 changes: 29 additions & 13 deletions gallery_dl/extractor/exhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ def items(self):
if self.gallery_token:
gpage = self._gallery_page()
self.image_token = text.extract(gpage, 'hentai.org/s/', '"')[0]
if "hentai.org/mpv/" in gpage:
self.log.warning("Extraction with Multi-Page Viewer "
"enabled is not supported")
if not self.image_token:
self.log.error("Failed to extract image token "
"from gallery page")
self.log.debug("Page content:\n%s", gpage)
return
self.wait()
ipage = self._image_page()
else:
Expand Down Expand Up @@ -201,12 +209,16 @@ def image_from_page(self, page):
iurl = extr('<img id="img" src="', '"')
orig = extr('hentai.org/fullimg.php', '"')

if self.original and orig:
url = self.root + "/fullimg.php" + text.unescape(orig)
data = self._parse_original_info(extr('ownload original', '<'))
else:
url = iurl
data = self._parse_image_info(url)
try:
if self.original and orig:
url = self.root + "/fullimg.php" + text.unescape(orig)
data = self._parse_original_info(extr('ownload original', '<'))
else:
url = iurl
data = self._parse_image_info(url)
except IndexError:
self.log.warning("Unable to parse image info for '%s'", url)
self.log.debug("Page content:\n%s", page)

data["num"] = self.image_num
data["image_token"] = self.key["start"] = extr('var startkey="', '";')
Expand All @@ -232,13 +244,17 @@ def images_from_api(self):
imgurl , pos = text.extract(page["i3"], 'id="img" src="', '"', pos)
origurl, pos = text.extract(page["i7"], '<a href="', '"')

if self.original and origurl:
url = text.unescape(origurl)
data = self._parse_original_info(
text.extract(page["i7"], "ownload original", "<", pos)[0])
else:
url = imgurl
data = self._parse_image_info(url)
try:
if self.original and origurl:
url = text.unescape(origurl)
data = self._parse_original_info(text.extract(
page["i7"], "ownload original", "<", pos)[0])
else:
url = imgurl
data = self._parse_image_info(url)
except IndexError:
self.log.warning("Unable to parse image info for '%s'", url)
self.log.debug("Page content:\n%s", page)

data["num"] = request["page"]
data["image_token"] = imgkey
Expand Down

0 comments on commit 81b35ed

Please sign in to comment.