Skip to content

Commit

Permalink
[2chan] fix metadata extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Dec 3, 2019
1 parent 173a934 commit 71acbda
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
10 changes: 6 additions & 4 deletions gallery_dl/extractor/2chan.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ def posts(self, page):
def parse(self, post):
"""Build post-object by extracting data from an HTML post"""
data = self._extract_post(post)
if data["name"]:
data["name"] = data["name"].strip()
if '<a href="/' in post:
self._extract_image(post, data)
data["tim"], _, data["extension"] = data["filename"].partition(".")
Expand All @@ -78,10 +80,10 @@ def parse(self, post):
@staticmethod
def _extract_post(post):
return text.extract_all(post, (
("no" , 'name="', '"'),
("post", '<b>', '</b>'),
("name", '<b>', ' </b>'),
("now" , '</font> ', ' '),
("post", 'class="csb">' , '<'),
("name", 'class="cnm">' , '<'),
("now" , 'class="cnw">' , '<'),
("no" , 'class="cno">No.', '<'),
(None , '<blockquote', ''),
("com" , '>', '</blockquote>'),
))[0]
Expand Down
2 changes: 1 addition & 1 deletion gallery_dl/extractor/wikiart.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class WikiartArtworksExtractor(WikiartExtractor):
directory_fmt = ("{category}", "Artworks by {group!c}", "{type}")
pattern = BASE_PATTERN + r"/paintings-by-([\w-]+)/([\w-]+)"
test = ("https://www.wikiart.org/en/paintings-by-media/grisaille", {
"url": "f92d55669fa949491c26a5437527adb14b35b8cc",
"url": "228426a9d32b5bba9d659944c6b0ba73883af33f",
})

def __init__(self, match):
Expand Down

0 comments on commit 71acbda

Please sign in to comment.