diff --git a/CHANGELOG.md b/CHANGELOG.md index 70880f7..85ad5ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # CHANGELOG +## UNRELEASED + +- Improve sanitization of whitespace escapes + ## 7.0.1 + - Improve sanitization of HTML entities ## 7.0.0 diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index c6a4e26..4c290b3 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -127,6 +127,26 @@ describe("sanitizeUrl", () => { ).toBe("https://example.com/javascript:alert('XSS')"); }); + it("removes whitespace escape sequences", () => { + const attackVectors = [ + "javascri\npt:alert('xss')", + "javascri\rpt:alert('xss')", + "javascri\tpt:alert('xss')", + "javascrip\\%74t:alert('XSS')", + "javascrip%5c%72t:alert()", + "javascrip%5Ctt:alert()", + "javascrip%255Ctt:alert()", + "javascrip%25%35Ctt:alert()", + "javascrip%25%35%43tt:alert()", + "javascrip%25%32%35%25%33%35%25%34%33rt:alert()", + "javascrip%255Crt:alert('%25xss')", + ]; + + attackVectors.forEach((vector) => { + expect(sanitizeUrl(vector)).toBe(BLANK_URL); + }); + }); + describe("invalid protocols", () => { describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => { it(`replaces ${protocol} urls with ${BLANK_URL}`, () => { diff --git a/src/constants.ts b/src/constants.ts index ef18d43..73968cd 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -4,5 +4,7 @@ export const htmlCtrlEntityRegex = /&(newline|tab);/gi; export const ctrlCharactersRegex = /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim; export const urlSchemeRegex = /^.+(:|:)/gim; +export const whitespaceEscapeCharsRegex = + /(\\|%5[cC])((%(6[eE]|72|74))|[nrt])/g; export const relativeFirstCharacters = [".", "/"]; export const BLANK_URL = "about:blank"; diff --git a/src/index.ts b/src/index.ts index dd04813..5fafb8b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { invalidProtocolRegex, relativeFirstCharacters, urlSchemeRegex, + whitespaceEscapeCharsRegex, } from "./constants"; function isRelativeUrlWithoutProtocol(url: string): boolean { @@ -20,21 +21,38 @@ function decodeHtmlCharacters(str: string) { }); } +function decodeURI(uri: string): string { + try { + return decodeURIComponent(uri); + } catch (e: unknown) { + // Ignoring error + // It is possible that the URI contains a `%` not associated + // with URI/URL-encoding. + return uri; + } +} + export function sanitizeUrl(url?: string): string { if (!url) { return BLANK_URL; } let charsToDecode; - let decodedUrl = url; + let decodedUrl = decodeURI(url); + do { decodedUrl = decodeHtmlCharacters(decodedUrl) .replace(htmlCtrlEntityRegex, "") .replace(ctrlCharactersRegex, "") + .replace(whitespaceEscapeCharsRegex, "") .trim(); + + decodedUrl = decodeURI(decodedUrl); + charsToDecode = decodedUrl.match(ctrlCharactersRegex) || decodedUrl.match(htmlEntitiesRegex) || - decodedUrl.match(htmlCtrlEntityRegex); + decodedUrl.match(htmlCtrlEntityRegex) || + decodedUrl.match(whitespaceEscapeCharsRegex); } while (charsToDecode && charsToDecode.length > 0); const sanitizedUrl = decodedUrl; if (!sanitizedUrl) {