diff --git a/readme_renderer/clean.py b/readme_renderer/clean.py index d6edaa1..6cd88b0 100644 --- a/readme_renderer/clean.py +++ b/readme_renderer/clean.py @@ -41,9 +41,7 @@ # Custom Additions "*": ["id"], - "hr": ["class"], - "img": ["src", "width", "height", "alt", "align", "class"], - "span": ["class"], + "img": ["src", "width", "height", "alt", "align"], "th": ["align"], "td": ["align"], "h1": ["align"], @@ -52,19 +50,44 @@ "h4": ["align"], "h5": ["align"], "h6": ["align"], - "code": ["class"], "p": ["align"], } +# Class is a specific attribute because not only do we want to allow it only +# on certain tags, but we also want to control possible values. +ALLOWED_CLASSES = { + "img": {"align-left", "align-right", "align-center"}, + "span": set( + # Classes for syntax coloring + # The original source for this list is + # https://github.com/pygments/pygments/blob/cfaa45dcc4103da8cf1700fd0d3e5708d894337b/pygments/token.py + # which is a superset from the list in + # https://github.com/pypa/warehouse/blob/master/warehouse/static/sass/blocks/_project-description.scss#L256 + # This means that some classes are unused and it's most probably OK. + "bp c c1 ch cm cp cpf cs dl err esc fm g gd ge gh gi go gp gr gs gt " + "gu il k kc kd kn kp kr kt l ld m mb mf mh mi mo n na nb nc nd ne nf " + "ni nl nn no nt nv nx o ow p py s s1 s2 sa sb sc sd se sh si sr ss sx " + "vc vg vi vm w x".split() + ) +} + ALLOWED_STYLES = [ ] +def is_attributes_allowed(tag, name, value): + if name == "class": + # In our case, there's no use-case where a single element may have + # multiple classes, so we don't have to split() to compare. + return value in ALLOWED_CLASSES.get(tag, ()) + return name in ALLOWED_ATTRIBUTES.get(tag, []) + ALLOWED_ATTRIBUTES["*"] + + def clean(html, tags=None, attributes=None, styles=None): if tags is None: tags = ALLOWED_TAGS if attributes is None: - attributes = ALLOWED_ATTRIBUTES + attributes = is_attributes_allowed if styles is None: styles = ALLOWED_STYLES diff --git a/tests/fixtures/test_GFM_024.html b/tests/fixtures/test_GFM_024.html new file mode 100644 index 0000000..1f12196 --- /dev/null +++ b/tests/fixtures/test_GFM_024.html @@ -0,0 +1,4 @@ +
foo +bar
+ + diff --git a/tests/fixtures/test_GFM_024.md b/tests/fixtures/test_GFM_024.md new file mode 100644 index 0000000..007930a --- /dev/null +++ b/tests/fixtures/test_GFM_024.md @@ -0,0 +1,5 @@ +foo +bar + + + diff --git a/tests/test_clean.py b/tests/test_clean.py index 4eb8b41..781e95b 100644 --- a/tests/test_clean.py +++ b/tests/test_clean.py @@ -1,5 +1,24 @@ -from readme_renderer.clean import clean +import pytest + +from readme_renderer import clean def test_invalid_link(): - assert clean('foo') == "foo" + assert clean.clean('foo') == "foo" + + +@pytest.mark.parametrize( + "tag, name, value, expected", + [ + ("form", "align", "left", False), # form doesn't allow attributes + ("h1", "align", "left", True), # h1 allows align attribute + ("h1", "class", "align-left", False), # h1 doesn't allow class + ("img", "onerror", "alert()", False), # img doesn't onerror attribute + ("img", "class", "something", False), # img allows class but not this one + ("img", "class", "align-left", True), # img allows this class + ("img", "id", "some-id", True), # everything allows id + ("form", "id", "some-id", True), # everything allows id + ] +) +def test_is_attributes_allowed(tag, name, value, expected): + assert clean.is_attributes_allowed(tag, name, value) == expected diff --git a/tox.ini b/tox.ini index 632235a..d690cff 100644 --- a/tox.ini +++ b/tox.ini @@ -9,7 +9,7 @@ commands = extras = md [testenv:pep8] -basepython = python3.6 +basepython = python3 deps = flake8 pep8-naming