From 88d8fb9c04d35c41e07149a6c609ef61fd7c155a Mon Sep 17 00:00:00 2001 From: Mike Fiedler Date: Thu, 21 Nov 2024 12:46:55 -0500 Subject: [PATCH] check if html and pour ammonia on it Signed-off-by: Mike Fiedler --- requirements/main.in | 1 + requirements/main.txt | 4 +++- tests/unit/test_forms.py | 19 +++++++++++++++++-- warehouse/forms.py | 8 +++++--- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/requirements/main.in b/requirements/main.in index 02c76fa47ff1..50bf00d4dc91 100644 --- a/requirements/main.in +++ b/requirements/main.in @@ -31,6 +31,7 @@ lxml more-itertools msgpack natsort +nh3 opensearch-py orjson packaging>=24.2 diff --git a/requirements/main.txt b/requirements/main.txt index 892fda3b49f8..4c8a3122422f 100644 --- a/requirements/main.txt +++ b/requirements/main.txt @@ -1410,7 +1410,9 @@ nh3==0.2.18 \ --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe - # via readme-renderer + # via + # -r requirements/main.in + # readme-renderer openapi-core==0.19.4 \ --hash=sha256:1150d9daa5e7b4cacfd7d7e097333dc89382d7d72703934128dcf8a1a4d0df49 \ --hash=sha256:38e8347b6ebeafe8d3beb588214ecf0171874bb65411e9d4efd23cb011687201 diff --git a/tests/unit/test_forms.py b/tests/unit/test_forms.py index 92fc66fb5c38..d1c588858dc4 100644 --- a/tests/unit/test_forms.py +++ b/tests/unit/test_forms.py @@ -89,9 +89,24 @@ def test_invalid_password(self, password, expected): class TestPreventHTMLTagsValidator: - def test_valid(self): + @pytest.mark.parametrize( + "inbound_data", + [ + "A link https://example.com", + "query string https://example.com?query=string", + "anchor https://example.com#fragment", + "qs and anchor https://example.com?query=string#fragment", + "path, qs, anchor https://example.com/path?query=string#fragment", + "A comment with a > character", + "A comment with a < character", + "A comment with a & character", + "A comment with a ' character", + 'A comment with a " character', + ], + ) + def test_valid(self, inbound_data): validator = PreventHTMLTagsValidator() - validator(pretend.stub(), pretend.stub(data="https://example.com")) + validator(pretend.stub(), pretend.stub(data=inbound_data)) def test_invalid(self): validator = PreventHTMLTagsValidator() diff --git a/warehouse/forms.py b/warehouse/forms.py index baf6a37523c5..f6cdc6f6f1fb 100644 --- a/warehouse/forms.py +++ b/warehouse/forms.py @@ -14,8 +14,7 @@ import typing as t -from html import escape - +from nh3 import clean, is_html from wtforms import Form as BaseForm, StringField from wtforms.validators import InputRequired, ValidationError from zxcvbn import zxcvbn @@ -95,7 +94,10 @@ def __init__(self, message: str | None = None): self.message = message def __call__(self, form: BaseForm, field: Field): - if escape(field.data) != field.data: + # Override the default nh3.ALLOWED_TAGS to be an empty set + allowed_tags: set[str] = set() + + if is_html(field.data) and field.data != clean(field.data, tags=allowed_tags): raise ValidationError(self.message)