Skip to content

Commit

Permalink
feat: allow composer to adjust sanitization (#564) (#579)
Browse files Browse the repository at this point in the history
* feat: allow disable sanitization (#564)

* test: #564

* chore: add changeset

* chore: restore prior whitespace

* refactor: adjust sanitizer to provide more data to the composer

* refactor: DX tweaks

* chore: adjust size limit

will golf this down later

* chore: tweak changeset

---------

Co-authored-by: Evan Jacobs <probablyup@gmail.com>
  • Loading branch information
SukkaW and quantizor authored Aug 18, 2024
1 parent 553a175 commit 62a16f3
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 27 deletions.
24 changes: 24 additions & 0 deletions .changeset/tricky-poems-collect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
'markdown-to-jsx': minor
---

Allow modifying HTML attribute sanitization when `options.sanitizer` is passed by the composer.

By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`.

This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`:

```jsx
// sanitizer in this situation would receive:
// ('javascript:alert("foo")', 'a', 'href')

;<Markdown options={{ sanitizer: (value, tag, attribute) => value }}>
{`[foo](javascript:alert("foo"))`}
</Markdown>

// or

compiler('[foo](javascript:alert("foo"))', {
sanitizer: (value, tag, attribute) => value,
})
```
28 changes: 26 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ The most lightweight, customizable React markdown component.
- [options.createElement - Custom React.createElement behavior](#optionscreateelement---custom-reactcreateelement-behavior)
- [options.enforceAtxHeadings](#optionsenforceatxheadings)
- [options.renderRule](#optionsrenderrule)
- [options.sanitizer](#optionssanitizer)
- [options.slugify](#optionsslugify)
- [options.namedCodesToUnicode](#optionsnamedcodestounicode)
- [options.disableParsingRawHTML](#optionsdisableparsingrawhtml)
Expand Down Expand Up @@ -435,21 +436,44 @@ function App() {
}
````

#### options.sanitizer

By default a lightweight URL sanitizer function is provided to avoid common attack vectors that might be placed into the `href` of an anchor tag, for example. The sanitizer receives the input, the HTML tag being targeted, and the attribute name. The original function is available as a library export called `sanitizer`.

This can be overridden and replaced with a custom sanitizer if desired via `options.sanitizer`:

```jsx
// sanitizer in this situation would receive:
// ('javascript:alert("foo")', 'a', 'href')

;<Markdown options={{ sanitizer: (value, tag, attribute) => value }}>
{`[foo](javascript:alert("foo"))`}
</Markdown>

// or

compiler('[foo](javascript:alert("foo"))', {
sanitizer: (value, tag, attribute) => value,
})
```

#### options.slugify

By default, a [lightweight deburring function](https://github.com/probablyup/markdown-to-jsx/blob/bc2f57412332dc670f066320c0f38d0252e0f057/index.js#L261-L275) is used to generate an HTML id from headings. You can override this by passing a function to `options.slugify`. This is helpful when you are using non-alphanumeric characters (e.g. Chinese or Japanese characters) in headings. For example:

```jsx
;<Markdown options={{ slugify: str => str }}># 中文</Markdown>
<Markdown options={{ slugify: str => str }}># 中文</Markdown>

// or

compiler('# 中文', { slugify: str => str })

// renders:
;<h1 id="中文">中文</h1>
<h1 id="中文">中文</h1>
```

The original function is available as a library export called `slugify`.

#### options.namedCodesToUnicode

By default only a couple of named html codes are converted to unicode characters:
Expand Down
42 changes: 41 additions & 1 deletion index.compiler.spec.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { compiler, RuleType } from './index'
import { compiler, sanitizer, RuleType } from './index'
import * as React from 'react'
import * as ReactDOM from 'react-dom'
import * as fs from 'fs'
Expand Down Expand Up @@ -1180,6 +1180,46 @@ describe('links', () => {
`)
})

it('should not sanitize markdown when explicitly disabled', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})

render(compiler('[foo](javascript:doSomethingBad)', { sanitizer: x => x }))

expect(root.innerHTML).toMatchInlineSnapshot(`
<a href="javascript:doSomethingBad">
foo
</a>
`)

expect(console.warn).not.toHaveBeenCalled()
})

it('tag and attribute are provided to allow for conditional override', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})

render(
compiler(
'[foo](javascript:doSomethingBad)\n![foo](javascript:doSomethingBad)',
{
sanitizer: (value, tag) => (tag === 'a' ? value : sanitizer(value)),
}
)
)

expect(root.innerHTML).toMatchInlineSnapshot(`
<p>
<a href="javascript:doSomethingBad">
foo
</a>
<img alt="foo">
</p>
`)

expect(console.warn).toHaveBeenCalledTimes(1)
})

it('should sanitize markdown links containing JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {})
jest.spyOn(console, 'error').mockImplementation(() => {})
Expand Down
74 changes: 52 additions & 22 deletions index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -731,8 +731,10 @@ function normalizeAttributeKey(key) {
}

function attributeValueToJSXPropValue(
tag: MarkdownToJSX.HTMLTags,
key: keyof React.AllHTMLAttributes<Element>,
value: string
value: string,
sanitizeUrlFn: MarkdownToJSX.Options['sanitizer']
): any {
if (key === 'style') {
return value.split(/;\s?/).reduce(function (styles, kvPair) {
Expand All @@ -750,7 +752,7 @@ function attributeValueToJSXPropValue(
return styles
}, {})
} else if (key === 'href' || key === 'src') {
return sanitizeUrl(value)
return sanitizeUrlFn(value, tag, key)
} else if (value.match(INTERPOLATION_R)) {
// return as a string and let the consumer decide what to do with it
value = value.slice(1, value.length - 1)
Expand Down Expand Up @@ -951,7 +953,7 @@ function matchParagraph(
return [match, captured]
}

function sanitizeUrl(url: string): string | undefined {
export function sanitizer(url: string): string {
try {
const decoded = decodeURIComponent(url).replace(/[^A-Za-z0-9/:]/g, '')

Expand All @@ -963,7 +965,7 @@ function sanitizeUrl(url: string): string | undefined {
)
}

return undefined
return null
}
} catch (e) {
if (process.env.NODE_ENV !== 'production') {
Expand Down Expand Up @@ -1138,12 +1140,13 @@ export function compiler(
options: MarkdownToJSX.Options = {}
) {
options.overrides = options.overrides || {}
options.sanitizer = options.sanitizer || sanitizer
options.slugify = options.slugify || slugify
options.namedCodesToUnicode = options.namedCodesToUnicode
? { ...namedCodesToUnicode, ...options.namedCodesToUnicode }
: namedCodesToUnicode

const createElementFn = options.createElement || React.createElement
options.createElement = options.createElement || React.createElement

// JSX custom pragma
// eslint-disable-next-line no-unused-vars
Expand All @@ -1158,7 +1161,7 @@ export function compiler(
) {
const overrideProps = get(options.overrides, `${tag}.props`, {})

return createElementFn(
return options.createElement(
getTag(tag, options.overrides),
{
...props,
Expand Down Expand Up @@ -1228,7 +1231,10 @@ export function compiler(
return React.createElement(wrapper, { key: 'outer' }, jsx)
}

function attrStringToMap(str: string): JSX.IntrinsicAttributes {
function attrStringToMap(
tag: MarkdownToJSX.HTMLTags,
str: string
): JSX.IntrinsicAttributes {
const attributes = str.match(ATTR_EXTRACTOR_R)
if (!attributes) {
return null
Expand All @@ -1243,8 +1249,10 @@ export function compiler(

const mappedKey = ATTRIBUTE_TO_JSX_PROP_MAP[key] || key
const normalizedValue = (map[mappedKey] = attributeValueToJSXPropValue(
tag,
key,
value
value,
options.sanitizer
))

if (
Expand Down Expand Up @@ -1366,7 +1374,7 @@ export function compiler(
parse(capture /*, parse, state*/) {
return {
// if capture[3] it's additional metadata
attrs: attrStringToMap(capture[3] || ''),
attrs: attrStringToMap('code', capture[3] || ''),
lang: capture[2] || undefined,
text: capture[4],
type: RuleType.codeBlock,
Expand Down Expand Up @@ -1409,13 +1417,13 @@ export function compiler(
order: Priority.HIGH,
parse(capture /*, parse*/) {
return {
target: `#${options.slugify(capture[1])}`,
target: `#${options.slugify(capture[1], slugify)}`,
text: capture[1],
}
},
render(node, output, state) {
return (
<a key={state.key} href={sanitizeUrl(node.target)}>
<a key={state.key} href={options.sanitizer(node.target, 'a', 'href')}>
<sup key={state.key}>{node.text}</sup>
</a>
)
Expand Down Expand Up @@ -1450,7 +1458,7 @@ export function compiler(
parse(capture, parse, state) {
return {
children: parseInline(parse, capture[2], state),
id: options.slugify(capture[2]),
id: options.slugify(capture[2], slugify),
level: capture[1].length as MarkdownToJSX.HeadingNode['level'],
}
},
Expand Down Expand Up @@ -1495,10 +1503,14 @@ export function compiler(
const noInnerParse =
DO_NOT_PROCESS_HTML_ELEMENTS.indexOf(tagName) !== -1

const tag = (
noInnerParse ? tagName : capture[1]
).trim() as MarkdownToJSX.HTMLTags

const ast = {
attrs: attrStringToMap(capture[2]),
attrs: attrStringToMap(tag, capture[2]),
noInnerParse: noInnerParse,
tag: (noInnerParse ? tagName : capture[1]).trim(),
tag,
} as {
attrs: ReturnType<typeof attrStringToMap>
children?: ReturnType<MarkdownToJSX.NestedParser> | undefined
Expand Down Expand Up @@ -1539,9 +1551,11 @@ export function compiler(
match: anyScopeRegex(HTML_SELF_CLOSING_ELEMENT_R),
order: Priority.HIGH,
parse(capture /*, parse, state*/) {
const tag = capture[1].trim() as MarkdownToJSX.HTMLTags

return {
attrs: attrStringToMap(capture[2] || ''),
tag: capture[1].trim(),
attrs: attrStringToMap(tag, capture[2] || ''),
tag,
}
},
render(node, output, state) {
Expand Down Expand Up @@ -1574,7 +1588,7 @@ export function compiler(
key={state.key}
alt={node.alt || undefined}
title={node.title || undefined}
src={sanitizeUrl(node.target)}
src={options.sanitizer(node.target, 'img', 'src')}
/>
)
},
Expand All @@ -1596,7 +1610,11 @@ export function compiler(
},
render(node, output, state) {
return (
<a key={state.key} href={sanitizeUrl(node.target)} title={node.title}>
<a
key={state.key}
href={options.sanitizer(node.target, 'a', 'href')}
title={node.title}
>
{output(node.children, state)}
</a>
)
Expand Down Expand Up @@ -1725,7 +1743,7 @@ export function compiler(
<img
key={state.key}
alt={node.alt}
src={sanitizeUrl(refs[node.ref].target)}
src={options.sanitizer(refs[node.ref].target, 'img', 'src')}
title={refs[node.ref].title}
/>
) : null
Expand All @@ -1749,7 +1767,7 @@ export function compiler(
return refs[node.ref] ? (
<a
key={state.key}
href={sanitizeUrl(refs[node.ref].target)}
href={options.sanitizer(refs[node.ref].target, 'a', 'href')}
title={refs[node.ref].title}
>
{output(node.children, state)}
Expand Down Expand Up @@ -1934,7 +1952,10 @@ export function compiler(
<footer key="footer">
{footnotes.map(function createFootnote(def) {
return (
<div id={options.slugify(def.identifier)} key={def.identifier}>
<div
id={options.slugify(def.identifier, slugify)}
key={def.identifier}
>
{def.identifier}
{emitter(parser(def.footnote, { inline: true }))}
</div>
Expand Down Expand Up @@ -2375,11 +2396,20 @@ export namespace MarkdownToJSX {
state: State
) => React.ReactChild

/**
* Override the built-in sanitizer function for URLs, etc if desired. The built-in version is available as a library export called `sanitizer`.
*/
sanitizer: (
value: string,
tag: HTMLTags,
attribute: string
) => string | null

/**
* Override normalization of non-URI-safe characters for use in generating
* HTML IDs for anchor linking purposes.
*/
slugify: (source: string) => string
slugify: (input: string, defaultFn: (input: string) => string) => string

/**
* Declare the type of the wrapper to be used when there are multiple
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@
"size-limit": [
{
"path": "./dist/index.module.js",
"limit": "6.1 kB"
"limit": "6.2 kB"
},
{
"path": "./dist/index.modern.js",
"limit": "6.1 kB"
"limit": "6.2 kB"
}
],
"jest": {
Expand Down

0 comments on commit 62a16f3

Please sign in to comment.