From 1c0be2b876f66e3c23c27ffe79f30cfa17191de0 Mon Sep 17 00:00:00 2001 From: Iris Booker Date: Mon, 6 May 2024 17:42:08 -0500 Subject: [PATCH 1/5] li-38822 fix: handle whitespace escapes --- src/__tests__/index.test.ts | 13 +++++++++++++ src/constants.ts | 1 + src/index.ts | 5 ++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index c6a4e26..3b70efc 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -127,6 +127,19 @@ describe("sanitizeUrl", () => { ).toBe("https://example.com/javascript:alert('XSS')"); }); + it("removes whitespace escape sequences", () => { + const attackVectors = [ + "javascri\npt:alert('xss')", + "javascri\rpt:alert('xss')", + "javascri\tpt:alert('xss')", + "javascrip\x74t:alert('XSS')", + ]; + + attackVectors.forEach((vector) => { + expect(sanitizeUrl(vector)).toBe(BLANK_URL); + }); + }); + describe("invalid protocols", () => { describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => { it(`replaces ${protocol} urls with ${BLANK_URL}`, () => { diff --git a/src/constants.ts b/src/constants.ts index ef18d43..f87cd63 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -4,5 +4,6 @@ export const htmlCtrlEntityRegex = /&(newline|tab);/gi; export const ctrlCharactersRegex = /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim; export const urlSchemeRegex = /^.+(:|:)/gim; +export const whitespaceEscapeChars = /\\[nrt]/g; export const relativeFirstCharacters = [".", "/"]; export const BLANK_URL = "about:blank"; diff --git a/src/index.ts b/src/index.ts index dd04813..3560f0f 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { invalidProtocolRegex, relativeFirstCharacters, urlSchemeRegex, + whitespaceEscapeChars, } from "./constants"; function isRelativeUrlWithoutProtocol(url: string): boolean { @@ -30,11 +31,13 @@ export function sanitizeUrl(url?: string): string { decodedUrl = decodeHtmlCharacters(decodedUrl) .replace(htmlCtrlEntityRegex, "") .replace(ctrlCharactersRegex, "") + .replace(whitespaceEscapeChars, "") .trim(); charsToDecode = decodedUrl.match(ctrlCharactersRegex) || decodedUrl.match(htmlEntitiesRegex) || - decodedUrl.match(htmlCtrlEntityRegex); + decodedUrl.match(htmlCtrlEntityRegex) || + decodedUrl.match(whitespaceEscapeChars); } while (charsToDecode && charsToDecode.length > 0); const sanitizedUrl = decodedUrl; if (!sanitizedUrl) { From cf7dbb8350114101ab06517aa46255c5b8762f60 Mon Sep 17 00:00:00 2001 From: Iris Booker Date: Wed, 8 May 2024 15:49:19 -0500 Subject: [PATCH 2/5] fix: decode uri and sanitize whitespace --- CHANGELOG.md | 1 + src/__tests__/index.test.ts | 7 ++++++- src/constants.ts | 3 ++- src/index.ts | 10 ++++++---- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70880f7..70cc0d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # CHANGELOG ## 7.0.1 + - Improve sanitization of HTML entities ## 7.0.0 diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 3b70efc..cd4f1e3 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -132,7 +132,12 @@ describe("sanitizeUrl", () => { "javascri\npt:alert('xss')", "javascri\rpt:alert('xss')", "javascri\tpt:alert('xss')", - "javascrip\x74t:alert('XSS')", + "javascrip\\%74t:alert('XSS')", + "javascrip%5c%72t:alert()", + "javascrip%5Ctt:alert()", + "javascrip%255Ctt:alert()", + "javascrip%25%35Ctt:alert()", + "javascrip%25%35%43tt:alert()", ]; attackVectors.forEach((vector) => { diff --git a/src/constants.ts b/src/constants.ts index f87cd63..73968cd 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -4,6 +4,7 @@ export const htmlCtrlEntityRegex = /&(newline|tab);/gi; export const ctrlCharactersRegex = /[\u0000-\u001F\u007F-\u009F\u2000-\u200D\uFEFF]/gim; export const urlSchemeRegex = /^.+(:|:)/gim; -export const whitespaceEscapeChars = /\\[nrt]/g; +export const whitespaceEscapeCharsRegex = + /(\\|%5[cC])((%(6[eE]|72|74))|[nrt])/g; export const relativeFirstCharacters = [".", "/"]; export const BLANK_URL = "about:blank"; diff --git a/src/index.ts b/src/index.ts index 3560f0f..bb6987e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,12 +1,13 @@ import { BLANK_URL, ctrlCharactersRegex, + hexCodesRegex, htmlCtrlEntityRegex, htmlEntitiesRegex, invalidProtocolRegex, relativeFirstCharacters, urlSchemeRegex, - whitespaceEscapeChars, + whitespaceEscapeCharsRegex, } from "./constants"; function isRelativeUrlWithoutProtocol(url: string): boolean { @@ -26,18 +27,19 @@ export function sanitizeUrl(url?: string): string { return BLANK_URL; } let charsToDecode; - let decodedUrl = url; + let decodedUrl = decodeURIComponent(url); + do { decodedUrl = decodeHtmlCharacters(decodedUrl) .replace(htmlCtrlEntityRegex, "") .replace(ctrlCharactersRegex, "") - .replace(whitespaceEscapeChars, "") + .replace(whitespaceEscapeCharsRegex, "") .trim(); charsToDecode = decodedUrl.match(ctrlCharactersRegex) || decodedUrl.match(htmlEntitiesRegex) || decodedUrl.match(htmlCtrlEntityRegex) || - decodedUrl.match(whitespaceEscapeChars); + decodedUrl.match(whitespaceEscapeCharsRegex); } while (charsToDecode && charsToDecode.length > 0); const sanitizedUrl = decodedUrl; if (!sanitizedUrl) { From 15926b63488e3c588f5260c6c1ed953d43226656 Mon Sep 17 00:00:00 2001 From: Iris Booker Date: Wed, 8 May 2024 15:52:37 -0500 Subject: [PATCH 3/5] fix: add to chg log, remove unused import --- CHANGELOG.md | 4 ++++ src/index.ts | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70cc0d9..85ad5ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # CHANGELOG +## UNRELEASED + +- Improve sanitization of whitespace escapes + ## 7.0.1 - Improve sanitization of HTML entities diff --git a/src/index.ts b/src/index.ts index bb6987e..65402af 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,6 @@ import { BLANK_URL, ctrlCharactersRegex, - hexCodesRegex, htmlCtrlEntityRegex, htmlEntitiesRegex, invalidProtocolRegex, From 6c15df9c76ed28efcca894c7c51510cae5af3fd8 Mon Sep 17 00:00:00 2001 From: Iris Booker Date: Wed, 8 May 2024 19:06:16 -0500 Subject: [PATCH 4/5] fix: decodingURIComponent each sanitize round --- src/__tests__/index.test.ts | 1 + src/index.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index cd4f1e3..5df001b 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -138,6 +138,7 @@ describe("sanitizeUrl", () => { "javascrip%255Ctt:alert()", "javascrip%25%35Ctt:alert()", "javascrip%25%35%43tt:alert()", + "javascrip%25%32%35%25%33%35%25%34%33tt:alert()", ]; attackVectors.forEach((vector) => { diff --git a/src/index.ts b/src/index.ts index 65402af..e229b70 100644 --- a/src/index.ts +++ b/src/index.ts @@ -34,6 +34,7 @@ export function sanitizeUrl(url?: string): string { .replace(ctrlCharactersRegex, "") .replace(whitespaceEscapeCharsRegex, "") .trim(); + decodedUrl = decodeURIComponent(decodedUrl); charsToDecode = decodedUrl.match(ctrlCharactersRegex) || decodedUrl.match(htmlEntitiesRegex) || From 284119eccdf1f0dd9a6c0a5c162eb290ea04608b Mon Sep 17 00:00:00 2001 From: Iris Booker Date: Fri, 10 May 2024 11:54:58 -0500 Subject: [PATCH 5/5] fix: handle decode uri failure --- src/__tests__/index.test.ts | 3 ++- src/index.ts | 17 +++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/__tests__/index.test.ts b/src/__tests__/index.test.ts index 5df001b..4c290b3 100644 --- a/src/__tests__/index.test.ts +++ b/src/__tests__/index.test.ts @@ -138,7 +138,8 @@ describe("sanitizeUrl", () => { "javascrip%255Ctt:alert()", "javascrip%25%35Ctt:alert()", "javascrip%25%35%43tt:alert()", - "javascrip%25%32%35%25%33%35%25%34%33tt:alert()", + "javascrip%25%32%35%25%33%35%25%34%33rt:alert()", + "javascrip%255Crt:alert('%25xss')", ]; attackVectors.forEach((vector) => { diff --git a/src/index.ts b/src/index.ts index e229b70..5fafb8b 100644 --- a/src/index.ts +++ b/src/index.ts @@ -21,12 +21,23 @@ function decodeHtmlCharacters(str: string) { }); } +function decodeURI(uri: string): string { + try { + return decodeURIComponent(uri); + } catch (e: unknown) { + // Ignoring error + // It is possible that the URI contains a `%` not associated + // with URI/URL-encoding. + return uri; + } +} + export function sanitizeUrl(url?: string): string { if (!url) { return BLANK_URL; } let charsToDecode; - let decodedUrl = decodeURIComponent(url); + let decodedUrl = decodeURI(url); do { decodedUrl = decodeHtmlCharacters(decodedUrl) @@ -34,7 +45,9 @@ export function sanitizeUrl(url?: string): string { .replace(ctrlCharactersRegex, "") .replace(whitespaceEscapeCharsRegex, "") .trim(); - decodedUrl = decodeURIComponent(decodedUrl); + + decodedUrl = decodeURI(decodedUrl); + charsToDecode = decodedUrl.match(ctrlCharactersRegex) || decodedUrl.match(htmlEntitiesRegex) ||