diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py index 42aed79..7b3f542 100644 --- a/readme_renderer/clean.py +++ b/readme_renderer/clean.py @@ -19,6 +19,8 @@ import bleach.callbacks import bleach.linkifier import bleach.sanitizer +import html5lib.filters.base +import pygments.token ALLOWED_TAGS = [ @@ -54,14 +56,62 @@ "width", "height", ] +ALLOWED_CLASSES = { + "img": ["align-left", "align-center", "align-right"], + "span": [c for c in pygments.token.STANDARD_TYPES.values() if c], +} + + +class _CSSClassFilter(html5lib.filters.base.Filter): + def __init__(self, *args, **kwargs): + self.allowed_classes = kwargs.pop("allowed_classes", {}) + + super().__init__(*args, **kwargs) + + def __iter__(self): + for token in super().__iter__(): + token = self.sanitize_token(token) + if token: + yield token + + def sanitize_token(self, token): + if token["type"] in {"StartTag", "EndTag", "EmptyTag"}: + name = token["name"] + + if "data" in token: + attrs = token["data"] + + if (None, "class") in attrs: + new_classes = self.sanitize_css_classes( + name, + attrs[(None, "class")] + ) + + if new_classes: + attrs[(None, "class")] = new_classes + else: + del attrs[(None, "class")] + + token["data"] = attrs + + return token + + def sanitize_css_classes(self, name, classes): + classes = classes.split() + allowed = set(self.allowed_classes.get(name, [])) + classes = sorted(set(classes) & allowed) + return " ".join(classes) + -def clean(html, tags=None, attributes=None, styles=None): +def clean(html, tags=None, attributes=None, styles=None, classes=None): if tags is None: tags = ALLOWED_TAGS if attributes is None: attributes = ALLOWED_ATTRIBUTES if styles is None: styles = ALLOWED_STYLES + if classes is None: + classes = ALLOWED_CLASSES # Clean the output using Bleach cleaner = bleach.sanitizer.Cleaner( @@ -69,6 +119,10 @@ def clean(html, tags=None, attributes=None, styles=None): attributes=attributes, styles=styles, filters=[ + # Bleach by default doesn't allow whitelisting what CSS classes + # are available to be used, so we'll override that behavior with + # our own filter which does. + functools.partial(_CSSClassFilter, allowed_classes=classes), # Bleach Linkify makes it easy to modify links, however, we will # not be using it to create additional links. functools.partial( diff --git a/tests/test_clean.py b/tests/test_clean.py index 4eb8b41..bac6b7a 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -3,3 +3,8 @@ def test_invalid_link(): assert clean('foo') == "foo" + + +def test_css_sanitizer(): + r = clean("") + assert r == ''