From a0537c7623ba84252c5535020227babdf6da314f Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 8 Jul 2024 12:26:52 +1000 Subject: [PATCH] Handle escaped characters in consumeSubQuery Fixes #2146 --- CHANGES.md | 1 + .../java/org/jsoup/select/QueryParser.java | 5 +++- .../java/org/jsoup/nodes/ElementTest.java | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index d06bda6a9b..3758b9a417 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -40,6 +40,7 @@ e.g.: `h1:has(+h2)`). [2137](https://github.com/jhy/jsoup/issues/2137) * The `:empty` selector incorrectly matched elements that started with a blank text node and were followed by non-empty nodes, due to an incorrect short-circuit. [2130](https://github.com/jhy/jsoup/issues/2130) +* `Element.cssSelector()` would fail with "Did not find balanced marker" when building a selector for elements that had a `(` or `[` in their class names. And selectors with those characters escaped would not match as expected. [2146](https://github.com/jhy/jsoup/issues/2146) * Fuzz: a Stack Overflow exception could occur when resolving a crafted `` URL, in the normalizing regex. [2165](https://github.com/jhy/jsoup/issues/2165) diff --git a/src/main/java/org/jsoup/select/QueryParser.java b/src/main/java/org/jsoup/select/QueryParser.java index a8c263a122..eaa1c0fe0f 100644 --- a/src/main/java/org/jsoup/select/QueryParser.java +++ b/src/main/java/org/jsoup/select/QueryParser.java @@ -158,7 +158,10 @@ private String consumeSubQuery() { sq.append("(").append(tq.chompBalanced('(', ')')).append(")"); else if (tq.matches("[")) sq.append("[").append(tq.chompBalanced('[', ']')).append("]"); - else + else if (tq.matches("\\")) { // bounce over escapes + sq.append(tq.consume()); + if (!tq.isEmpty()) sq.append(tq.consume()); + } else sq.append(tq.consume()); } return StringUtil.releaseBuilder(sq); diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index f0e1f4627e..ef0b22543a 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -2612,6 +2612,30 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) { assertEquals(element, elements.first()); } + @Test void cssSelectorWithBracket() { + // https://github.com/jhy/jsoup/issues/2146 + Document doc = Jsoup.parse("
One
Two
"); + Element div = doc.expectFirst("div"); + String selector = div.cssSelector(); + assertEquals("html > body > div.a\\[foo\\]", selector); // would fail with "Did not find balanced marker", consumeSubquery was not handling escapes + + Elements selected = doc.select(selector); + assertEquals(1, selected.size()); + assertEquals(selected.first(), div); + } + + @Test void cssSelectorUnbalanced() { + // https://github.com/jhy/jsoup/issues/2146 + Document doc = Jsoup.parse("
One
Two
"); + Element div = doc.expectFirst("div"); + String selector = div.cssSelector(); + assertEquals("html > body > div.a\\(foo", selector); + + Elements selected = doc.select(selector); + assertEquals(1, selected.size()); + assertEquals(selected.first(), div); + } + @Test void orphanSiblings() { Element el = new Element("div"); assertEquals(0, el.siblingElements().size());