Skip to content

Commit

Permalink
Fix the strange whitespace handling of tails when merging elements
Browse files Browse the repository at this point in the history
Refs #44.
  • Loading branch information
matthiask committed May 18, 2024
1 parent 61d92b0 commit 3842ed6
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 4 deletions.
3 changes: 2 additions & 1 deletion html_sanitizer/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,8 @@ def sanitize(self, html):
element.append(child)

# tail is merged with previous element.
nx.drop_tree()
element.tail = nx.tail
nx.getparent().remove(nx)

# Process element again
backlog.append(element)
Expand Down
12 changes: 9 additions & 3 deletions html_sanitizer/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_02_a_tag(self):
def test_03_merge(self):
entries = (
("<h2>foo</h2><h2>bar</h2>", "<h2>foobar</h2>"),
("<h2>foo </h2> <h2> bar</h2>", "<h2>foo bar</h2> "),
("<h2>foo </h2> <h2> bar</h2>", "<h2>foo bar</h2>"),
)

self.run_tests(entries)
Expand Down Expand Up @@ -667,7 +667,6 @@ def test_normalize_early(self):
)

def test_typographic_whitespace_tags_merging(self):
html = "This is <strong>some</strong> <strong>text</strong> with adjacent tags."
sanitizer = Sanitizer(
{
"whitespace": set(),
Expand All @@ -676,7 +675,14 @@ def test_typographic_whitespace_tags_merging(self):
)
self.run_tests(
[
(html, "This is <strong>some text</strong> with adjacent tags."),
(
"This is <strong>some</strong> <strong>text</strong> with adjacent tags.",
"This is <strong>some text</strong> with adjacent tags.",
),
(
"This is <strong>some</strong> <strong>text</strong>with adjacent tags.",
"This is <strong>some text</strong>with adjacent tags.",
),
],
sanitizer=sanitizer,
)

0 comments on commit 3842ed6

Please sign in to comment.