From aa03b2ab84c0a2f5c4d560f93e98159f8204cda7 Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Tue, 18 Sep 2018 09:06:13 -0400 Subject: [PATCH 1/2] Whitelist the allowed CSS classes --- readme_renderer/clean.py | 58 +++++++++++++++++++++++++++++++++++++++- tests/test_clean.py | 5 ++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py index 42aed79..e009d9e 100644 --- a/readme_renderer/clean.py +++ b/readme_renderer/clean.py @@ -19,6 +19,8 @@ import bleach.callbacks import bleach.linkifier import bleach.sanitizer +import html5lib.filters.base +import pygments.token ALLOWED_TAGS = [ @@ -54,14 +56,64 @@ "width", "height", ] +ALLOWED_CLASSES = { + "img": ["align-left", "align-center", "align-right"], + "span": [c for c in pygments.token.STANDARD_TYPES.values() if c], +} + + +class _CSSClassFilter(html5lib.filters.base.Filter): + def __init__(self, *args, allowed_classes=None, **kwargs): + super().__init__(*args, **kwargs) + + if allowed_classes is None: + allowed_classes = {} + self.allowed_classes = allowed_classes + + def __iter__(self): + for token in super().__iter__(): + token = self.sanitize_token(token) + if token: + yield token + + def sanitize_token(self, token): + if token["type"] in {"StartTag", "EndTag", "EmptyTag"}: + name = token["name"] + + if "data" in token: + attrs = token["data"] + + if (None, "class") in attrs: + new_classes = self.sanitize_css_classes( + name, + attrs[(None, "class")] + ) + + if new_classes: + attrs[(None, "class")] = new_classes + else: + del attrs[(None, "class")] + + token["data"] = attrs + + return token + + def sanitize_css_classes(self, name, classes): + classes = classes.split() + allowed = set(self.allowed_classes.get(name, [])) + classes = sorted(set(classes) & allowed) + return " ".join(classes) + -def clean(html, tags=None, attributes=None, styles=None): +def clean(html, tags=None, attributes=None, styles=None, classes=None): if tags is None: tags = ALLOWED_TAGS if attributes is None: attributes = ALLOWED_ATTRIBUTES if styles is None: styles = ALLOWED_STYLES + if classes is None: + classes = ALLOWED_CLASSES # Clean the output using Bleach cleaner = bleach.sanitizer.Cleaner( @@ -69,6 +121,10 @@ def clean(html, tags=None, attributes=None, styles=None): attributes=attributes, styles=styles, filters=[ + # Bleach by default doesn't allow whitelisting what CSS classes + # are available to be used, so we'll override that behavior with + # our own filter which does. + functools.partial(_CSSClassFilter, allowed_classes=classes), # Bleach Linkify makes it easy to modify links, however, we will # not be using it to create additional links. functools.partial( diff --git a/tests/test_clean.py b/tests/test_clean.py index 4eb8b41..bac6b7a 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -3,3 +3,8 @@ def test_invalid_link(): assert clean('foo') == "foo" + + +def test_css_sanitizer(): + r = clean("") + assert r == '' From ee18dfccbdbd4275e1346a07c20e9d40492e4963 Mon Sep 17 00:00:00 2001 From: Donald Stufft Date: Tue, 18 Sep 2018 09:33:50 -0400 Subject: [PATCH 2/2] Remove Python3-isms --- readme_renderer/clean.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py index e009d9e..7b3f542 100644 --- a/readme_renderer/clean.py +++ b/readme_renderer/clean.py @@ -63,12 +63,10 @@ class _CSSClassFilter(html5lib.filters.base.Filter): - def __init__(self, *args, allowed_classes=None, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, *args, **kwargs): + self.allowed_classes = kwargs.pop("allowed_classes", {}) - if allowed_classes is None: - allowed_classes = {} - self.allowed_classes = allowed_classes + super().__init__(*args, **kwargs) def __iter__(self): for token in super().__iter__():