Skip to content

Commit

Permalink
Merge pull request #3 from tedivm/python_escapes
Browse files Browse the repository at this point in the history
Add python style unicode support (`\U0001F3B5`)
  • Loading branch information
iamakulov authored Nov 20, 2017
2 parents 4ac0f7d + 368192e commit e411ff2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
14 changes: 9 additions & 5 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ import 'string.fromcodepoint';
* u\{([0-9A-Fa-f]+)\} - first alternative; matches the variable-length hexadecimal escape sequence (\u{ABCD0})
* |
* u([0-9A-Fa-f]{4}) - second alternative; matches the 4-digit hexadecimal escape sequence (\uABCD)
* |
* |
* x([0-9A-Fa-f]{2}) - third alternative; matches the 2-digit hexadecimal escape sequence (\xA5)
* |
* |
* ([1-7][0-7]{0,2}|[0-7]{2,3}) - fourth alternative; matches the up-to-3-digit octal escape sequence (\5 or \512)
* |
* |
* (['"tbrnfv0\\]) - fifth alternative; matches the special escape characters (\t, \n and so on)
* |
* \U([0-9A-Fa-f]+) - sixth alternative; matches the 8-digit hexadecimal escape sequence used by python (\U0001F3B5)
* )
*/
const jsEscapeRegex = /\\(u\{([0-9A-Fa-f]+)\}|u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([1-7][0-7]{0,2}|[0-7]{2,3})|(['"tbrnfv0\\]))/g;
const jsEscapeRegex = /\\(u\{([0-9A-Fa-f]+)\}|u([0-9A-Fa-f]{4})|x([0-9A-Fa-f]{2})|([1-7][0-7]{0,2}|[0-7]{2,3})|(['"tbrnfv0\\]))|\\U([0-9A-Fa-f]{8})/g;

const usualEscapeSequences = {
'0': '\0',
Expand All @@ -33,7 +35,7 @@ const fromHex = (str) => String.fromCodePoint(parseInt(str, 16));
const fromOct = (str) => String.fromCodePoint(parseInt(str, 8));

export default (string) => {
return string.replace(jsEscapeRegex, (_, __, varHex, longHex, shortHex, octal, specialCharacter) => {
return string.replace(jsEscapeRegex, (_, __, varHex, longHex, shortHex, octal, specialCharacter, python) => {
if (varHex !== undefined) {
return fromHex(varHex);
} else if (longHex !== undefined) {
Expand All @@ -42,6 +44,8 @@ export default (string) => {
return fromHex(shortHex);
} else if (octal !== undefined) {
return fromOct(octal);
} else if (python !== undefined) {
return fromHex(python);
} else {
return usualEscapeSequences[specialCharacter];
}
Expand Down
5 changes: 5 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,8 @@ test('avoids double unescape cascade', t => {
t.is(unescapeJs('---\\\\x41---'), '---\\x41---');
t.is(unescapeJs('---\\x5cx41---'), '---\\x41---');
});

test('python hex escape sequences', t => {
t.is(unescapeJs('---\\U000000A9---'), '---\u00A9---');
t.is(unescapeJs('---\\U0001F3B5---'), '---\uD83C\uDFB5---');
});

0 comments on commit e411ff2

Please sign in to comment.