Skip to content

Commit

Permalink
Add light version
Browse files Browse the repository at this point in the history
This refactors the project to split into several files, and adds
a new `stringify-entities/light` module that has a small bundle size
but does not have any of the formatting options.

Closes GH-9.
  • Loading branch information
wooorm committed Oct 19, 2020
1 parent 4d023fd commit d03284b
Show file tree
Hide file tree
Showing 22 changed files with 268 additions and 189 deletions.
2 changes: 1 addition & 1 deletion build.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ while (++index < length) {
}

fs.writeFileSync(
path.join('dangerous.json'),
path.join('lib', 'constant', 'dangerous.json'),
JSON.stringify(conflict, null, 2) + '\n'
)
158 changes: 1 addition & 157 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,159 +1,3 @@
'use strict'

var entities = require('character-entities-html4')
var legacy = require('character-entities-legacy')
var hexadecimal = require('is-hexadecimal')
var decimal = require('is-decimal')
var alphanumerical = require('is-alphanumerical')
var dangerous = require('./dangerous.json')

module.exports = encode
encode.escape = escape

var own = {}.hasOwnProperty

// Characters
var equalsTo = 61

// List of enforced escapes.
var escapes = ['"', "'", '<', '>', '&', '`']

// Map of characters to names.
var characters = construct()

// Default escapes.
var defaultEscapes = toExpression(escapes)

// Surrogate pairs.
var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g

// Non-ASCII characters.
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
var bmp = /[\x01-\t\x0B\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g

// Encode special characters in `value`.
function encode(value, options) {
var settings = options || {}
var subset = settings.subset
var set = subset ? toExpression(subset) : defaultEscapes
var escapeOnly = settings.escapeOnly
var omit = settings.omitOptionalSemicolons

value = value.replace(set, replace)

if (subset || escapeOnly) {
return value
}

return value
.replace(surrogatePair, replaceSurrogatePair)
.replace(bmp, replace)

function replaceSurrogatePair(pair, pos, slice) {
return toHexReference(
(pair.charCodeAt(0) - 0xd800) * 0x400 +
pair.charCodeAt(1) -
0xdc00 +
0x10000,
slice.charCodeAt(pos + 2),
omit
)
}

function replace(char, pos, slice) {
return one(char, slice.charCodeAt(pos + 1), settings)
}
}

// Shortcut to escape special characters in HTML.
function escape(value) {
return encode(value, {escapeOnly: true, useNamedReferences: true})
}

// Encode `char` according to `options`.
function one(char, next, options) {
var shortest = options.useShortestReferences
var omit = options.omitOptionalSemicolons
var named
var code
var numeric
var decimal

if ((shortest || options.useNamedReferences) && own.call(characters, char)) {
named = toNamed(characters[char], next, omit, options.attribute)
}

if (shortest || !named) {
code = char.charCodeAt(0)
numeric = toHexReference(code, next, omit)

// Use the shortest numeric reference when requested.
// A simple algorithm would use decimal for all code points under 100, as
// those are shorter than hexadecimal:
//
// * `&#99;` vs `&#x63;` (decimal shorter)
// * `&#100;` vs `&#x64;` (equal)
//
// However, because we take `next` into consideration when `omit` is used,
// And it would be possible that decimals are shorter on bigger values as
// well if `next` is hexadecimal but not decimal, we instead compare both.
if (shortest) {
decimal = toDecimalReference(code, next, omit)

if (decimal.length < numeric.length) {
numeric = decimal
}
}
}

if (named && (!shortest || named.length < numeric.length)) {
return named
}

return numeric
}

// Transform `code` into an entity.
function toNamed(name, next, omit, attribute) {
var value = '&' + name

if (
omit &&
own.call(legacy, name) &&
dangerous.indexOf(name) === -1 &&
(!attribute || (next && next !== equalsTo && !alphanumerical(next)))
) {
return value
}

return value + ';'
}

// Transform `code` into a hexadecimal character reference.
function toHexReference(code, next, omit) {
var value = '&#x' + code.toString(16).toUpperCase()
return omit && next && !hexadecimal(next) ? value : value + ';'
}

// Transform `code` into a decimal character reference.
function toDecimalReference(code, next, omit) {
var value = '&#' + String(code)
return omit && next && !decimal(next) ? value : value + ';'
}

// Create an expression for `characters`.
function toExpression(characters) {
return new RegExp('[' + characters.join('') + ']', 'g')
}

// Construct the map.
function construct() {
var chars = {}
var name

for (name in entities) {
chars[entities[name]] = name
}

return chars
}
module.exports = require('./lib')
1 change: 1 addition & 0 deletions lib/constant/assign.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = Object.assign
10 changes: 10 additions & 0 deletions lib/constant/characters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
var entities = require('character-entities-html4')

var characters = {}
var name

module.exports = characters

for (name in entities) {
characters[entities[name]] = name
}
File renamed without changes.
1 change: 1 addition & 0 deletions lib/constant/from-char-code.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = String.fromCharCode
1 change: 1 addition & 0 deletions lib/constant/has-own-property.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
module.exports = {}.hasOwnProperty
49 changes: 49 additions & 0 deletions lib/core.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
'use strict'

module.exports = encode

// Encode special characters in `value`.
function encode(value, options) {
value = value.replace(
options.subset
? new RegExp('[' + options.subset.join('') + ']', 'g')
: /["&'<>`]/g,
basic
)

if (options.subset || options.escapeOnly) {
return value
}

return (
value
// Surrogate pairs.
.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, surrogate)
// BMP control characters (C0 except for LF, CR, SP; DEL; and some more
// non-ASCII ones).
.replace(
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
/[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g,
basic
)
)

function surrogate(pair, index, all) {
return options.format(
(pair.charCodeAt(0) - 0xd800) * 0x400 +
pair.charCodeAt(1) -
0xdc00 +
0x10000,
all.charCodeAt(index + 2),
options
)
}

function basic(character, index, all) {
return options.format(
character.charCodeAt(0),
all.charCodeAt(index + 1),
options
)
}
}
14 changes: 14 additions & 0 deletions lib/encode-hexadecimal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
'use strict'

var core = require('./core')
var assign = require('./constant/assign')
var basic = require('./util/format-basic')

module.exports = encodeHexadecimal

// Encode special characters in `value` as hexadecimals.
function encodeHexadecimal(value, options) {
// Note: this file was added in a minor release, so here we can use
// `Object.assign`.
return core(value, assign({format: basic}, options))
}
13 changes: 13 additions & 0 deletions lib/encode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
'use strict'

var xtend = require('xtend')
var core = require('./core')
var smart = require('./util/format-smart')

module.exports = encode

// Encode special characters in `value`.
function encode(value, options) {
// Note: Switch to `Object.assign` next major.
return core(value, xtend(options, {format: smart}))
}
15 changes: 15 additions & 0 deletions lib/escape.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
'use strict'

var core = require('./core')
var smart = require('./util/format-smart')

module.exports = escape

// Shortcut to escape special characters in HTML.
function escape(value) {
return core(value, {
escapeOnly: true,
useNamedReferences: true,
format: smart
})
}
7 changes: 7 additions & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
'use strict'

var encode = require('./encode')
var escape = require('./escape')

module.exports = encode
encode.escape = escape
5 changes: 5 additions & 0 deletions lib/util/format-basic.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module.exports = formatBasic

function formatBasic(code) {
return '&#x' + code.toString(16).toUpperCase() + ';'
}
48 changes: 48 additions & 0 deletions lib/util/format-smart.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
module.exports = formatPretty

var toHexadecimal = require('./to-hexadecimal')
var toDecimal = require('./to-decimal')
var toNamed = require('./to-named')

// Encode `character` according to `options`.
function formatPretty(code, next, options) {
var named
var numeric
var decimal

if (options.useNamedReferences || options.useShortestReferences) {
named = toNamed(
code,
next,
options.omitOptionalSemicolons,
options.attribute
)
}

if (options.useShortestReferences || !named) {
numeric = toHexadecimal(code, next, options.omitOptionalSemicolons)

// Use the shortest numeric reference when requested.
// A simple algorithm would use decimal for all code points under 100, as
// those are shorter than hexadecimal:
//
// * `&#99;` vs `&#x63;` (decimal shorter)
// * `&#100;` vs `&#x64;` (equal)
//
// However, because we take `next` into consideration when `omit` is used,
// And it would be possible that decimals are shorter on bigger values as
// well if `next` is hexadecimal but not decimal, we instead compare both.
if (options.useShortestReferences) {
decimal = toDecimal(code, next, options.omitOptionalSemicolons)

if (decimal.length < numeric.length) {
numeric = decimal
}
}
}

return named &&
(!options.useShortestReferences || named.length < numeric.length)
? named
: numeric
}
9 changes: 9 additions & 0 deletions lib/util/to-decimal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module.exports = toDecimalReference

var fromCharCode = require('../constant/from-char-code')

// Transform `code` into a decimal character reference.
function toDecimalReference(code, next, omit) {
var value = '&#' + String(code)
return omit && next && !/\d/.test(fromCharCode(next)) ? value : value + ';'
}
11 changes: 11 additions & 0 deletions lib/util/to-hexadecimal.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module.exports = toHexReference

var fromCharCode = require('../constant/from-char-code')

// Transform `code` into a hexadecimal character reference.
function toHexReference(code, next, omit) {
var value = '&#x' + code.toString(16).toUpperCase()
return omit && next && !/[\dA-Fa-f]/.test(fromCharCode(next))
? value
: value + ';'
}
33 changes: 33 additions & 0 deletions lib/util/to-named.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
module.exports = toNamed

var legacy = require('character-entities-legacy')
var characters = require('../constant/characters')
var fromCharCode = require('../constant/from-char-code')
var own = require('../constant/has-own-property')
var dangerous = require('../constant/dangerous.json')

// Transform `code` into a named character reference.
function toNamed(code, next, omit, attribute) {
var character = fromCharCode(code)
var name
var value

if (own.call(characters, character)) {
name = characters[character]
value = '&' + name

if (
omit &&
own.call(legacy, name) &&
dangerous.indexOf(name) === -1 &&
(!attribute ||
(next && next !== 61 /* `=` */ && /[^\da-z]/i.test(fromCharCode(next))))
) {
return value
}

return value + ';'
}

return ''
}
Loading

0 comments on commit d03284b

Please sign in to comment.