-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This refactors the project to split into several files, and adds a new `stringify-entities/light` module that has a small bundle size but does not have any of the formatting options. Closes GH-9.
- Loading branch information
Showing
22 changed files
with
268 additions
and
189 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,159 +1,3 @@ | ||
'use strict' | ||
|
||
var entities = require('character-entities-html4') | ||
var legacy = require('character-entities-legacy') | ||
var hexadecimal = require('is-hexadecimal') | ||
var decimal = require('is-decimal') | ||
var alphanumerical = require('is-alphanumerical') | ||
var dangerous = require('./dangerous.json') | ||
|
||
module.exports = encode | ||
encode.escape = escape | ||
|
||
var own = {}.hasOwnProperty | ||
|
||
// Characters | ||
var equalsTo = 61 | ||
|
||
// List of enforced escapes. | ||
var escapes = ['"', "'", '<', '>', '&', '`'] | ||
|
||
// Map of characters to names. | ||
var characters = construct() | ||
|
||
// Default escapes. | ||
var defaultEscapes = toExpression(escapes) | ||
|
||
// Surrogate pairs. | ||
var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g | ||
|
||
// Non-ASCII characters. | ||
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape | ||
var bmp = /[\x01-\t\x0B\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g | ||
|
||
// Encode special characters in `value`. | ||
function encode(value, options) { | ||
var settings = options || {} | ||
var subset = settings.subset | ||
var set = subset ? toExpression(subset) : defaultEscapes | ||
var escapeOnly = settings.escapeOnly | ||
var omit = settings.omitOptionalSemicolons | ||
|
||
value = value.replace(set, replace) | ||
|
||
if (subset || escapeOnly) { | ||
return value | ||
} | ||
|
||
return value | ||
.replace(surrogatePair, replaceSurrogatePair) | ||
.replace(bmp, replace) | ||
|
||
function replaceSurrogatePair(pair, pos, slice) { | ||
return toHexReference( | ||
(pair.charCodeAt(0) - 0xd800) * 0x400 + | ||
pair.charCodeAt(1) - | ||
0xdc00 + | ||
0x10000, | ||
slice.charCodeAt(pos + 2), | ||
omit | ||
) | ||
} | ||
|
||
function replace(char, pos, slice) { | ||
return one(char, slice.charCodeAt(pos + 1), settings) | ||
} | ||
} | ||
|
||
// Shortcut to escape special characters in HTML. | ||
function escape(value) { | ||
return encode(value, {escapeOnly: true, useNamedReferences: true}) | ||
} | ||
|
||
// Encode `char` according to `options`. | ||
function one(char, next, options) { | ||
var shortest = options.useShortestReferences | ||
var omit = options.omitOptionalSemicolons | ||
var named | ||
var code | ||
var numeric | ||
var decimal | ||
|
||
if ((shortest || options.useNamedReferences) && own.call(characters, char)) { | ||
named = toNamed(characters[char], next, omit, options.attribute) | ||
} | ||
|
||
if (shortest || !named) { | ||
code = char.charCodeAt(0) | ||
numeric = toHexReference(code, next, omit) | ||
|
||
// Use the shortest numeric reference when requested. | ||
// A simple algorithm would use decimal for all code points under 100, as | ||
// those are shorter than hexadecimal: | ||
// | ||
// * `c` vs `c` (decimal shorter) | ||
// * `d` vs `d` (equal) | ||
// | ||
// However, because we take `next` into consideration when `omit` is used, | ||
// And it would be possible that decimals are shorter on bigger values as | ||
// well if `next` is hexadecimal but not decimal, we instead compare both. | ||
if (shortest) { | ||
decimal = toDecimalReference(code, next, omit) | ||
|
||
if (decimal.length < numeric.length) { | ||
numeric = decimal | ||
} | ||
} | ||
} | ||
|
||
if (named && (!shortest || named.length < numeric.length)) { | ||
return named | ||
} | ||
|
||
return numeric | ||
} | ||
|
||
// Transform `code` into an entity. | ||
function toNamed(name, next, omit, attribute) { | ||
var value = '&' + name | ||
|
||
if ( | ||
omit && | ||
own.call(legacy, name) && | ||
dangerous.indexOf(name) === -1 && | ||
(!attribute || (next && next !== equalsTo && !alphanumerical(next))) | ||
) { | ||
return value | ||
} | ||
|
||
return value + ';' | ||
} | ||
|
||
// Transform `code` into a hexadecimal character reference. | ||
function toHexReference(code, next, omit) { | ||
var value = '&#x' + code.toString(16).toUpperCase() | ||
return omit && next && !hexadecimal(next) ? value : value + ';' | ||
} | ||
|
||
// Transform `code` into a decimal character reference. | ||
function toDecimalReference(code, next, omit) { | ||
var value = '&#' + String(code) | ||
return omit && next && !decimal(next) ? value : value + ';' | ||
} | ||
|
||
// Create an expression for `characters`. | ||
function toExpression(characters) { | ||
return new RegExp('[' + characters.join('') + ']', 'g') | ||
} | ||
|
||
// Construct the map. | ||
function construct() { | ||
var chars = {} | ||
var name | ||
|
||
for (name in entities) { | ||
chars[entities[name]] = name | ||
} | ||
|
||
return chars | ||
} | ||
module.exports = require('./lib') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
module.exports = Object.assign |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
var entities = require('character-entities-html4') | ||
|
||
var characters = {} | ||
var name | ||
|
||
module.exports = characters | ||
|
||
for (name in entities) { | ||
characters[entities[name]] = name | ||
} |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
module.exports = String.fromCharCode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
module.exports = {}.hasOwnProperty |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
'use strict' | ||
|
||
module.exports = encode | ||
|
||
// Encode special characters in `value`. | ||
function encode(value, options) { | ||
value = value.replace( | ||
options.subset | ||
? new RegExp('[' + options.subset.join('') + ']', 'g') | ||
: /["&'<>`]/g, | ||
basic | ||
) | ||
|
||
if (options.subset || options.escapeOnly) { | ||
return value | ||
} | ||
|
||
return ( | ||
value | ||
// Surrogate pairs. | ||
.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, surrogate) | ||
// BMP control characters (C0 except for LF, CR, SP; DEL; and some more | ||
// non-ASCII ones). | ||
.replace( | ||
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape | ||
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g, | ||
basic | ||
) | ||
) | ||
|
||
function surrogate(pair, index, all) { | ||
return options.format( | ||
(pair.charCodeAt(0) - 0xd800) * 0x400 + | ||
pair.charCodeAt(1) - | ||
0xdc00 + | ||
0x10000, | ||
all.charCodeAt(index + 2), | ||
options | ||
) | ||
} | ||
|
||
function basic(character, index, all) { | ||
return options.format( | ||
character.charCodeAt(0), | ||
all.charCodeAt(index + 1), | ||
options | ||
) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
'use strict' | ||
|
||
var core = require('./core') | ||
var assign = require('./constant/assign') | ||
var basic = require('./util/format-basic') | ||
|
||
module.exports = encodeHexadecimal | ||
|
||
// Encode special characters in `value` as hexadecimals. | ||
function encodeHexadecimal(value, options) { | ||
// Note: this file was added in a minor release, so here we can use | ||
// `Object.assign`. | ||
return core(value, assign({format: basic}, options)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
'use strict' | ||
|
||
var xtend = require('xtend') | ||
var core = require('./core') | ||
var smart = require('./util/format-smart') | ||
|
||
module.exports = encode | ||
|
||
// Encode special characters in `value`. | ||
function encode(value, options) { | ||
// Note: Switch to `Object.assign` next major. | ||
return core(value, xtend(options, {format: smart})) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
'use strict' | ||
|
||
var core = require('./core') | ||
var smart = require('./util/format-smart') | ||
|
||
module.exports = escape | ||
|
||
// Shortcut to escape special characters in HTML. | ||
function escape(value) { | ||
return core(value, { | ||
escapeOnly: true, | ||
useNamedReferences: true, | ||
format: smart | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
'use strict' | ||
|
||
var encode = require('./encode') | ||
var escape = require('./escape') | ||
|
||
module.exports = encode | ||
encode.escape = escape |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
module.exports = formatBasic | ||
|
||
function formatBasic(code) { | ||
return '&#x' + code.toString(16).toUpperCase() + ';' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
module.exports = formatPretty | ||
|
||
var toHexadecimal = require('./to-hexadecimal') | ||
var toDecimal = require('./to-decimal') | ||
var toNamed = require('./to-named') | ||
|
||
// Encode `character` according to `options`. | ||
function formatPretty(code, next, options) { | ||
var named | ||
var numeric | ||
var decimal | ||
|
||
if (options.useNamedReferences || options.useShortestReferences) { | ||
named = toNamed( | ||
code, | ||
next, | ||
options.omitOptionalSemicolons, | ||
options.attribute | ||
) | ||
} | ||
|
||
if (options.useShortestReferences || !named) { | ||
numeric = toHexadecimal(code, next, options.omitOptionalSemicolons) | ||
|
||
// Use the shortest numeric reference when requested. | ||
// A simple algorithm would use decimal for all code points under 100, as | ||
// those are shorter than hexadecimal: | ||
// | ||
// * `c` vs `c` (decimal shorter) | ||
// * `d` vs `d` (equal) | ||
// | ||
// However, because we take `next` into consideration when `omit` is used, | ||
// And it would be possible that decimals are shorter on bigger values as | ||
// well if `next` is hexadecimal but not decimal, we instead compare both. | ||
if (options.useShortestReferences) { | ||
decimal = toDecimal(code, next, options.omitOptionalSemicolons) | ||
|
||
if (decimal.length < numeric.length) { | ||
numeric = decimal | ||
} | ||
} | ||
} | ||
|
||
return named && | ||
(!options.useShortestReferences || named.length < numeric.length) | ||
? named | ||
: numeric | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
module.exports = toDecimalReference | ||
|
||
var fromCharCode = require('../constant/from-char-code') | ||
|
||
// Transform `code` into a decimal character reference. | ||
function toDecimalReference(code, next, omit) { | ||
var value = '&#' + String(code) | ||
return omit && next && !/\d/.test(fromCharCode(next)) ? value : value + ';' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
module.exports = toHexReference | ||
|
||
var fromCharCode = require('../constant/from-char-code') | ||
|
||
// Transform `code` into a hexadecimal character reference. | ||
function toHexReference(code, next, omit) { | ||
var value = '&#x' + code.toString(16).toUpperCase() | ||
return omit && next && !/[\dA-Fa-f]/.test(fromCharCode(next)) | ||
? value | ||
: value + ';' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
module.exports = toNamed | ||
|
||
var legacy = require('character-entities-legacy') | ||
var characters = require('../constant/characters') | ||
var fromCharCode = require('../constant/from-char-code') | ||
var own = require('../constant/has-own-property') | ||
var dangerous = require('../constant/dangerous.json') | ||
|
||
// Transform `code` into a named character reference. | ||
function toNamed(code, next, omit, attribute) { | ||
var character = fromCharCode(code) | ||
var name | ||
var value | ||
|
||
if (own.call(characters, character)) { | ||
name = characters[character] | ||
value = '&' + name | ||
|
||
if ( | ||
omit && | ||
own.call(legacy, name) && | ||
dangerous.indexOf(name) === -1 && | ||
(!attribute || | ||
(next && next !== 61 /* `=` */ && /[^\da-z]/i.test(fromCharCode(next)))) | ||
) { | ||
return value | ||
} | ||
|
||
return value + ';' | ||
} | ||
|
||
return '' | ||
} |
Oops, something went wrong.