From db45bf850a2b7bd19d56694cf81f10f2490f95b7 Mon Sep 17 00:00:00 2001 From: Brian White Date: Mon, 16 Jan 2017 09:46:36 -0500 Subject: [PATCH] querystring: improve unescapeBuffer performance PR-URL: https://github.com/nodejs/node/pull/10837 Reviewed-By: James M Snell --- .../querystring/querystring-unescapebuffer.js | 23 ++++++++ lib/querystring.js | 54 ++++++++++++------- test/parallel/test-querystring.js | 7 +++ 3 files changed, 65 insertions(+), 19 deletions(-) create mode 100644 benchmark/querystring/querystring-unescapebuffer.js diff --git a/benchmark/querystring/querystring-unescapebuffer.js b/benchmark/querystring/querystring-unescapebuffer.js new file mode 100644 index 00000000000000..fe48a6f149bc6a --- /dev/null +++ b/benchmark/querystring/querystring-unescapebuffer.js @@ -0,0 +1,23 @@ +'use strict'; +var common = require('../common.js'); +var querystring = require('querystring'); + +var bench = common.createBenchmark(main, { + input: [ + 'there is nothing to unescape here', + 'there%20are%20several%20spaces%20that%20need%20to%20be%20unescaped', + 'there%2Qare%0-fake%escaped values in%%%%this%9Hstring', + '%20%21%22%23%24%25%26%27%28%29%2A%2B%2C%2D%2E%2F%30%31%32%33%34%35%36%37' + ], + n: [10e6], +}); + +function main(conf) { + var input = conf.input; + var n = conf.n | 0; + + bench.start(); + for (var i = 0; i < n; i += 1) + querystring.unescapeBuffer(input); + bench.end(n); +} diff --git a/lib/querystring.js b/lib/querystring.js index 2ced10c72d1452..5ccb5fa77b320f 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -22,15 +22,41 @@ const Buffer = require('buffer').Buffer; function ParsedQueryString() {} ParsedQueryString.prototype = Object.create(null); - +const unhexTable = [ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0 - 15 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 16 - 31 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 32 - 47 + +0, +1, +2, +3, +4, +5, +6, +7, +8, +9, -1, -1, -1, -1, -1, -1, // 48 - 63 + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 64 - 79 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 80 - 95 + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 96 - 111 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 112 - 127 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 128 ... + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 // ... 255 +]; // a safe fast alternative to decodeURIComponent function unescapeBuffer(s, decodeSpaces) { var out = Buffer.allocUnsafe(s.length); var state = 0; - var n, m, hexchar; + var n, m, hexchar, c; - for (var inIndex = 0, outIndex = 0; inIndex <= s.length; inIndex++) { - var c = inIndex < s.length ? s.charCodeAt(inIndex) : NaN; + for (var inIndex = 0, outIndex = 0; ; inIndex++) { + if (inIndex < s.length) { + c = s.charCodeAt(inIndex); + } else { + if (state > 0) { + out[outIndex++] = 37/*%*/; + if (state === 2) + out[outIndex++] = hexchar; + } + break; + } switch (state) { case 0: // Any character switch (c) { @@ -51,13 +77,8 @@ function unescapeBuffer(s, decodeSpaces) { case 1: // First hex digit hexchar = c; - if (c >= 48/*0*/ && c <= 57/*9*/) { - n = c - 48/*0*/; - } else if (c >= 65/*A*/ && c <= 70/*F*/) { - n = c - 65/*A*/ + 10; - } else if (c >= 97/*a*/ && c <= 102/*f*/) { - n = c - 97/*a*/ + 10; - } else { + n = unhexTable[c]; + if (!(n >= 0)) { out[outIndex++] = 37/*%*/; out[outIndex++] = c; state = 0; @@ -68,13 +89,8 @@ function unescapeBuffer(s, decodeSpaces) { case 2: // Second hex digit state = 0; - if (c >= 48/*0*/ && c <= 57/*9*/) { - m = c - 48/*0*/; - } else if (c >= 65/*A*/ && c <= 70/*F*/) { - m = c - 65/*A*/ + 10; - } else if (c >= 97/*a*/ && c <= 102/*f*/) { - m = c - 97/*a*/ + 10; - } else { + m = unhexTable[c]; + if (!(m >= 0)) { out[outIndex++] = 37/*%*/; out[outIndex++] = hexchar; out[outIndex++] = c; @@ -87,7 +103,7 @@ function unescapeBuffer(s, decodeSpaces) { // TODO support returning arbitrary buffers. - return out.slice(0, outIndex - 1); + return out.slice(0, outIndex); } diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js index b528372b750d95..c4d457f98d53e1 100644 --- a/test/parallel/test-querystring.js +++ b/test/parallel/test-querystring.js @@ -294,6 +294,13 @@ assert.equal(0xd8, b[17]); assert.equal(0xa2, b[18]); assert.equal(0xe6, b[19]); +assert.strictEqual(qs.unescapeBuffer('a+b', true).toString(), 'a b'); +assert.strictEqual(qs.unescapeBuffer('a%').toString(), 'a%'); +assert.strictEqual(qs.unescapeBuffer('a%2').toString(), 'a%2'); +assert.strictEqual(qs.unescapeBuffer('a%20').toString(), 'a '); +assert.strictEqual(qs.unescapeBuffer('a%2g').toString(), 'a%2g'); +assert.strictEqual(qs.unescapeBuffer('a%%').toString(), 'a%%'); + // Test custom decode function demoDecode(str) {