Skip to content

Commit

Permalink
add 'text.parse_float()' + cleanup in text.py
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 29, 2019
1 parent 0c32dc5 commit 2d2953a
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 13 deletions.
2 changes: 1 addition & 1 deletion gallery_dl/extractor/behance.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def get_metadata(self, page):
"gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(title or ""),
"user": ", ".join(users),
"fields": [f for f in text.split_html(fields) if f != ", "],
"fields": [f for f in text.split_html(fields) if f != ","],
"date": text.parse_int(date),
"views": text.parse_int(stats[0]),
"votes": text.parse_int(stats[1]),
Expand Down
4 changes: 2 additions & 2 deletions gallery_dl/extractor/sankaku.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand Down Expand Up @@ -88,7 +88,7 @@ def get_post_data(self, post_id, extr=text.extract):
"id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": text.unescape(tags),
"vote_average": float(vavg or 0),
"vote_average": text.parse_float(vavg),
"vote_count": text.parse_int(vcnt),
"created_at": created,
"rating": (rating or "?")[0].lower(),
Expand Down
27 changes: 18 additions & 9 deletions gallery_dl/text.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-

# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Collection of functions that work in strings/text"""
"""Collection of functions that work on strings/text"""

import re
import html
Expand Down Expand Up @@ -47,7 +47,7 @@ def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
x for x in re.split("<[^>]+>", txt)
x.strip() for x in re.split("<[^>]+>", txt)
if x and not x.isspace()
]
except TypeError:
Expand Down Expand Up @@ -165,6 +165,16 @@ def parse_int(value, default=0):
return default


def parse_float(value, default=0.0):
"""Convert 'value' to float"""
if not value:
return default
try:
return float(value)
except (ValueError, TypeError):
return default


def parse_query(qs):
"""Parse a query string into key-value pairs"""
result = {}
Expand All @@ -182,12 +192,11 @@ def parse_query(qs):
else:
clean_path = clean_path_posix


urljoin = urllib.parse.urljoin

quote = urllib.parse.quote
unquote = urllib.parse.unquote
escape = html.escape

try:
unescape = html.unescape
except AttributeError:
import html.parser
unescape = html.parser.HTMLParser().unescape
escape = html.escape
unescape = html.unescape
24 changes: 23 additions & 1 deletion test/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,9 @@ def test_split_html(self, f=text.split_html):
# standard usage
self.assertEqual(f(""), empty)
self.assertEqual(f("Hello World."), ["Hello World."])
self.assertEqual(f(" Hello World. "), [" Hello World. "])
self.assertEqual(f(" Hello World. "), ["Hello World."])
self.assertEqual(f("Hello<br/>World."), result)
self.assertEqual(f(" Hello <br/> World. "), result)
self.assertEqual(
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)

Expand Down Expand Up @@ -260,6 +261,27 @@ def test_parse_int(self, f=text.parse_int):
self.assertEqual(f(value, default), default)
self.assertEqual(f("zzz", default), default)

def test_parse_float(self, f=text.parse_float):
self.assertEqual(f(0), 0.0)
self.assertEqual(f("0"), 0.0)
self.assertEqual(f(123), 123.0)
self.assertEqual(f("123"), 123.0)
self.assertEqual(f(123.456), 123.456)
self.assertEqual(f("123.456"), 123.456)

# invalid arguments
for value in INVALID_ALT:
self.assertEqual(f(value), 0.0)
self.assertEqual(f("zzz"), 0.0)
self.assertEqual(f([1, 2, 3]), 0.0)
self.assertEqual(f({1: 2, 3: 4}), 0.0)

# 'default' argument
default = "default"
for value in INVALID_ALT:
self.assertEqual(f(value, default), default)
self.assertEqual(f("zzz", default), default)

def test_parse_query(self, f=text.parse_query):
# standard usage
self.assertEqual(f(""), {})
Expand Down

0 comments on commit 2d2953a

Please sign in to comment.