diff --git a/src/main/java/org/owasp/html/CssTokens.java b/src/main/java/org/owasp/html/CssTokens.java index 28aa16dc..7fd44904 100644 --- a/src/main/java/org/owasp/html/CssTokens.java +++ b/src/main/java/org/owasp/html/CssTokens.java @@ -453,7 +453,7 @@ void lex() { char ch = css.charAt(pos); int startOfToken = pos; int startOfOutputToken = sb.length(); - final TokenType type; + TokenType type; switch (ch) { case '\t': case '\n': case '\f': case '\r': case ' ': case '\ufeff': consumeIgnorable(); @@ -514,6 +514,7 @@ void lex() { type = TokenType.UNICODE_RANGE; } else { type = consumeIdentOrUrlOrFunction(); + assert type != null; } break; case '0': case '1': case '2': case '3': case '4': @@ -533,7 +534,14 @@ && isDecimal(css.charAt(pos + 2)))) { if (consumeIgnorable()) { // --> type = TokenType.WHITESPACE; } else { - type = consumeIdentOrUrlOrFunction(); + TokenType identType = consumeIdentOrUrlOrFunction(); + if (identType == null) { + breakOutput(); + consumeDelim(ch); + type = TokenType.DELIM; + } else { + type = identType; + } } } else if (isIdentPart(lookahead)) { // treat "." as one token. @@ -589,9 +597,17 @@ && isDecimal(css.charAt(pos + 2)))) { } break; } - case '_': - type = consumeIdentOrUrlOrFunction(); + case '_': { + TokenType identType = consumeIdentOrUrlOrFunction(); + if (identType != null) { + type = identType; + } else { + ++pos; // drop + breakOutput(); + type = TokenType.WHITESPACE; + } break; + } case '\\': { // Optimistically parse as an ident. TokenType identType = consumeIdentOrUrlOrFunction(); @@ -624,7 +640,13 @@ && isDecimal(css.charAt(pos + 2)))) { type = TokenType.WHITESPACE; } } - assert pos > startOfToken + // Make progress even in the face of errors above. + if (type == null && pos == startOfToken) { + type = TokenType.WHITESPACE; + breakOutput(); + ++pos; + } + assert type != null && pos > startOfToken : "empty token at " + pos + ", ch0=" + css.charAt(startOfToken) + ":U+" + Integer.toHexString(css.charAt(startOfToken)); int endOfOutputToken = sb.length(); diff --git a/src/test/java/org/owasp/html/HtmlSanitizerTest.java b/src/test/java/org/owasp/html/HtmlSanitizerTest.java index d1a73e86..1ff169df 100644 --- a/src/test/java/org/owasp/html/HtmlSanitizerTest.java +++ b/src/test/java/org/owasp/html/HtmlSanitizerTest.java @@ -447,6 +447,13 @@ public static final void testIssue254SemicolonlessNamedCharactersInUrls() { assertEquals(want, sanitize(input)); } + @Test + public static final void testStylingCornerCase() { + String input = "