Skip to content

Commit

Permalink
Improve performance of unescapeXML
Browse files Browse the repository at this point in the history
The original replace function uses a regular expression to find
expressions to parse.  It is more efficient to use the indexOf to find
the first matching '&' character and then the matching ';' character.

Fixes half of #120.
  • Loading branch information
Erik Mogensen committed Sep 28, 2018
1 parent 8406a55 commit 8fccfe3
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions lib/escape.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,33 @@ function unescapeXMLReplace (match) {
}
throw new Error('Illegal XML character 0x' + num.toString(16))
}
return unescapeXMLTable[match]
return unescapeXMLTable[match] || match
}

exports.escapeXML = function escapeXML (s) {
return s.replace(/&|<|>|"|'/g, escapeXMLReplace)
}

exports.unescapeXML = function unescapeXML (s) {
return s.replace(/&(amp|lt|gt|quot|apos|#x[0-9a-fA-F]+|#[0-9]+);/g, unescapeXMLReplace)
var result = ''
var start = -1
var end = -1
var previous = 0
while ((start = s.indexOf('&', previous)) !== -1 && (end = s.indexOf(';', start + 1)) !== -1) {
result = result +
s.substring(previous, start) +
unescapeXMLReplace(s.substring(start, end + 1))
previous = end + 1
}

// shortcut if loop never entered:
// return the original string without creating new objects
if (previous === 0) return s

// push the remaining characters
result = result + s.substring(previous)

return result
}

exports.escapeXMLText = function escapeXMLText (s) {
Expand Down

0 comments on commit 8fccfe3

Please sign in to comment.