From 06b8038ddb3b4278c0ac8456ecab8499955e3e4a Mon Sep 17 00:00:00 2001 From: valadaptive Date: Fri, 13 Sep 2024 04:12:44 -0400 Subject: [PATCH] Further optimize string decoding --- lib/utils.js | 85 +++++++++++++++++++++------------------------- test/test_utils.js | 8 ++++- 2 files changed, 45 insertions(+), 48 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index 38a3fb5a..2963a74f 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -413,56 +413,13 @@ class OrderedQueue { } let decodeSlice; -if (typeof Buffer === 'function' && Buffer.prototype.utf8Slice) { - decodeSlice = function(arr, start, end) { - return Buffer.prototype.utf8Slice.call(arr, start, end); - }; +if (typeof Buffer === 'function' && typeof Buffer.prototype.utf8Slice === 'function') { + decodeSlice = Function.prototype.call.bind(Buffer.prototype.utf8Slice); } else { const DECODER = new TextDecoder(); - // Calling `subarray` is expensive enough that for small strings, it's faster - // to decode manually. decodeSlice = function(arr, start, end) { - if (end - start > 32) { - return DECODER.decode(arr.subarray(start, end)); - } - - let output = ''; - let i = start; - // Consume the string in 4-byte chunks. The performance benefit comes not - // from *reading* in chunks, but calling fromCharCode with 4 characters per - // call. - while (i + 3 < end) { - const n = (arr[i] << 24) | - (arr[i + 1] << 16) | - (arr[i + 2] << 8) | - arr[i + 3]; - // If the high bit of any character is set, it's a non-ASCII character. - // Fall back to TextDecoder for the remaining characters. - if (n & 0x80808080) { - output += DECODER.decode(arr.subarray(start + i, end)); - return output; - } - output += String.fromCharCode( - n >>> 24, - (n >> 16) & 0xff, - (n >> 8) & 0xff, - n & 0xff - ); - i += 4; - } - - // Handle the remainder of the string. - while (i < end) { - if (arr[i] & 0x80) { - output += DECODER.decode(arr.subarray(start + i, end)); - return output; - } - output += String.fromCharCode(arr[i]); - i++; - } - - return output; + return DECODER.decode(arr.subarray(start, end)); }; } @@ -839,7 +796,41 @@ class Tap { if (this.pos > this.length) { return; } - return decodeSlice(this.arr, pos, pos + len); + + let arr = this.arr; + let end = pos + len; + if (len > 24) { + return decodeSlice(arr, pos, end); + } + + let output = ''; + // Consume the string in 4-byte chunks. The performance benefit comes not + // from *reading* in chunks, but calling fromCharCode with 4 characters per + // call. + while (pos + 3 < end) { + let a = arr[pos], b = arr[pos + 1], c = arr[pos + 2], d = arr[pos + 3]; + // If the high bit of any character is set, it's a non-ASCII character. + // Fall back to TextDecoder for the remaining characters. + if ((a | b | c | d) & 0x80) { + output += decodeSlice(arr, pos, end); + return output; + } + output += String.fromCharCode(a, b, c, d); + pos += 4; + } + + // Handle the remainder of the string. + while (pos < end) { + let char = arr[pos]; + if (char & 0x80) { + output += decodeSlice(arr, pos, end); + return output; + } + output += String.fromCharCode(char); + pos++; + } + + return output; } writeString (s) { diff --git a/test/test_utils.js b/test/test_utils.js index d0f02711..70273b5c 100644 --- a/test/test_utils.js +++ b/test/test_utils.js @@ -211,7 +211,13 @@ suite('utils', () => { suite('string', () => { testWriterReader({ - elems: ['ahierw', '', 'alh hewlii! rew'], + elems: [ + 'ahierw', + '', + 'alh hewlii! rew', + 'sérialisation', + 'this string should be long enough that a different code path is exercised' + ], reader: function () { return this.readString(); }, skipper: function () { this.skipString(); }, writer: function (s) { this.writeString(s); }