Skip to content

Commit

Permalink
unescapeXML decodes any numeric entity, not just a whitelisted set
Browse files Browse the repository at this point in the history
This allows unescapeXML to correctly parse strings like @ (@) and
complex sequences like 🐍 (U+1F40D, Snake).
  • Loading branch information
mogsie committed Jun 15, 2018
1 parent 29275d7 commit 40ab541
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion lib/escape.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ var unescapeXMLTable = {
}

function unescapeXMLReplace (match) {
if (match[1] === '#') {
if (match[2] === 'x') {
return String.fromCodePoint(parseInt(match.slice(3), 16))
} else {
return String.fromCodePoint(parseInt(match.slice(2), 10))
}
}
return unescapeXMLTable[match]
}

Expand All @@ -34,7 +41,7 @@ exports.escapeXML = function escapeXML (s) {
}

exports.unescapeXML = function unescapeXML (s) {
return s.replace(/&(amp|#38|lt|#60|gt|#62|quot|#34|apos|#39);/g, unescapeXMLReplace)
return s.replace(/&(amp|lt|gt|quot|apos|#x[0-9a-fA-F]+|#[0-9]+);/g, unescapeXMLReplace)
}

exports.escapeXMLText = function escapeXMLText (s) {
Expand Down

0 comments on commit 40ab541

Please sign in to comment.