Skip to content

Commit

Permalink
[zerochan] parse API response manually when json.loads() fails (#6632)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Dec 12, 2024
1 parent d2c66ac commit a33065b
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 7 deletions.
33 changes: 26 additions & 7 deletions gallery_dl/extractor/zerochan.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,12 @@ def _parse_entry_html(self, entry_id):

def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
text = self.request(url).text
txt = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
item = util.json_loads('"' + txt)
except ValueError:
item = self._parse_json(txt)
item["id"] = text.parse_int(entry_id)

data = {
"id" : item["id"],
Expand All @@ -118,6 +116,27 @@ def _parse_entry_api(self, entry_id):

return data

def _parse_json(self, txt):
txt = re.sub(r"[\x00-\x1f\x7f]", "", txt)
main, _, tags = txt.partition('tags": [')

item = {}
for line in main.split(', "')[1:]:
key, _, value = line.partition('": ')
if value:
if value[0] == '"':
value = value[1:-1]
else:
value = text.parse_int(value)
if key:
item[key] = value

item["tags"] = tags = tags[5:].split('", "')
if tags:
tags[-1] = tags[-1][:-5]

return item

def _tags(self, post, page):
tags = collections.defaultdict(list)
for tag in post["tags"]:
Expand Down
42 changes: 42 additions & 0 deletions test/results/zerochan.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,46 @@
"source": "http://www.youtube.com/watch?v=0vodqkGPxt8",
},

{
"#url" : "https://www.zerochan.net/4354955",
"#comment" : "unescaped quotes in 'JSON' data (#6632)",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#auth" : False,
"#options" : {"metadata": True},

"author" : "SEGA",
"date" : "dt:2024-12-05 06:06:14",
"file_url": "https://static.zerochan.net/Miles.%22Tails%22.Prower.full.4354955.jpg",
"filename": "Miles.\"Tails\".Prower.full.4354955",
"height" : 705,
"id" : 4354955,
"name" : "Miles \"Tails\" Prower",
"size" : 252928,
"source" : "https://x.com/kellanstover/status/1580237736874606597",
"uploader": "Anima-Chao",
"width" : 4096,
"path" : [
"Sonic the Hedgehog",
"Miles \"Tails\" Prower",
],
"tags" : [
"Male",
"Animal",
"Fox",
"Sonic the Hedgehog",
"Flying",
"Character Sheet",
"Airplane",
"SEGA",
"Miles \"Tails\" Prower",
"Official Art",
"Midair",
"X (Twitter)",
"Sonic Origins",
"Official Art from X",
"Tory Patterson",
],
},

)

0 comments on commit a33065b

Please sign in to comment.