diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py
index 42aed79..7b3f542 100644
--- a/readme_renderer/clean.py
+++ b/readme_renderer/clean.py
@@ -19,6 +19,8 @@
import bleach.callbacks
import bleach.linkifier
import bleach.sanitizer
+import html5lib.filters.base
+import pygments.token
ALLOWED_TAGS = [
@@ -54,14 +56,62 @@
"width", "height",
]
+ALLOWED_CLASSES = {
+ "img": ["align-left", "align-center", "align-right"],
+ "span": [c for c in pygments.token.STANDARD_TYPES.values() if c],
+}
+
+
+class _CSSClassFilter(html5lib.filters.base.Filter):
+ def __init__(self, *args, **kwargs):
+ self.allowed_classes = kwargs.pop("allowed_classes", {})
+
+ super().__init__(*args, **kwargs)
+
+ def __iter__(self):
+ for token in super().__iter__():
+ token = self.sanitize_token(token)
+ if token:
+ yield token
+
+ def sanitize_token(self, token):
+ if token["type"] in {"StartTag", "EndTag", "EmptyTag"}:
+ name = token["name"]
+
+ if "data" in token:
+ attrs = token["data"]
+
+ if (None, "class") in attrs:
+ new_classes = self.sanitize_css_classes(
+ name,
+ attrs[(None, "class")]
+ )
+
+ if new_classes:
+ attrs[(None, "class")] = new_classes
+ else:
+ del attrs[(None, "class")]
+
+ token["data"] = attrs
+
+ return token
+
+ def sanitize_css_classes(self, name, classes):
+ classes = classes.split()
+ allowed = set(self.allowed_classes.get(name, []))
+ classes = sorted(set(classes) & allowed)
+ return " ".join(classes)
+
-def clean(html, tags=None, attributes=None, styles=None):
+def clean(html, tags=None, attributes=None, styles=None, classes=None):
if tags is None:
tags = ALLOWED_TAGS
if attributes is None:
attributes = ALLOWED_ATTRIBUTES
if styles is None:
styles = ALLOWED_STYLES
+ if classes is None:
+ classes = ALLOWED_CLASSES
# Clean the output using Bleach
cleaner = bleach.sanitizer.Cleaner(
@@ -69,6 +119,10 @@ def clean(html, tags=None, attributes=None, styles=None):
attributes=attributes,
styles=styles,
filters=[
+ # Bleach by default doesn't allow whitelisting what CSS classes
+ # are available to be used, so we'll override that behavior with
+ # our own filter which does.
+ functools.partial(_CSSClassFilter, allowed_classes=classes),
# Bleach Linkify makes it easy to modify links, however, we will
# not be using it to create additional links.
functools.partial(
diff --git a/tests/test_clean.py b/tests/test_clean.py
index 4eb8b41..bac6b7a 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -3,3 +3,8 @@
def test_invalid_link():
assert clean('foo') == "foo"
+
+
+def test_css_sanitizer():
+ r = clean("")
+ assert r == ''