Skip to content

Commit

Permalink
Further optimize string decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
valadaptive committed Sep 13, 2024
1 parent 141eb72 commit 06b8038
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 48 deletions.
85 changes: 38 additions & 47 deletions lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -413,56 +413,13 @@ class OrderedQueue {
}

let decodeSlice;
if (typeof Buffer === 'function' && Buffer.prototype.utf8Slice) {
decodeSlice = function(arr, start, end) {
return Buffer.prototype.utf8Slice.call(arr, start, end);
};
if (typeof Buffer === 'function' && typeof Buffer.prototype.utf8Slice === 'function') {
decodeSlice = Function.prototype.call.bind(Buffer.prototype.utf8Slice);
} else {
const DECODER = new TextDecoder();

// Calling `subarray` is expensive enough that for small strings, it's faster
// to decode manually.
decodeSlice = function(arr, start, end) {
if (end - start > 32) {
return DECODER.decode(arr.subarray(start, end));
}

let output = '';
let i = start;
// Consume the string in 4-byte chunks. The performance benefit comes not
// from *reading* in chunks, but calling fromCharCode with 4 characters per
// call.
while (i + 3 < end) {
const n = (arr[i] << 24) |
(arr[i + 1] << 16) |
(arr[i + 2] << 8) |
arr[i + 3];
// If the high bit of any character is set, it's a non-ASCII character.
// Fall back to TextDecoder for the remaining characters.
if (n & 0x80808080) {
output += DECODER.decode(arr.subarray(start + i, end));
return output;
}
output += String.fromCharCode(
n >>> 24,
(n >> 16) & 0xff,
(n >> 8) & 0xff,
n & 0xff
);
i += 4;
}

// Handle the remainder of the string.
while (i < end) {
if (arr[i] & 0x80) {
output += DECODER.decode(arr.subarray(start + i, end));
return output;
}
output += String.fromCharCode(arr[i]);
i++;
}

return output;
return DECODER.decode(arr.subarray(start, end));
};
}

Expand Down Expand Up @@ -839,7 +796,41 @@ class Tap {
if (this.pos > this.length) {
return;
}
return decodeSlice(this.arr, pos, pos + len);

let arr = this.arr;
let end = pos + len;
if (len > 24) {
return decodeSlice(arr, pos, end);
}

let output = '';
// Consume the string in 4-byte chunks. The performance benefit comes not
// from *reading* in chunks, but calling fromCharCode with 4 characters per
// call.
while (pos + 3 < end) {
let a = arr[pos], b = arr[pos + 1], c = arr[pos + 2], d = arr[pos + 3];
// If the high bit of any character is set, it's a non-ASCII character.
// Fall back to TextDecoder for the remaining characters.
if ((a | b | c | d) & 0x80) {
output += decodeSlice(arr, pos, end);
return output;
}
output += String.fromCharCode(a, b, c, d);
pos += 4;
}

// Handle the remainder of the string.
while (pos < end) {
let char = arr[pos];
if (char & 0x80) {
output += decodeSlice(arr, pos, end);
return output;
}
output += String.fromCharCode(char);
pos++;
}

return output;
}

writeString (s) {
Expand Down
8 changes: 7 additions & 1 deletion test/test_utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,13 @@ suite('utils', () => {
suite('string', () => {

testWriterReader({
elems: ['ahierw', '', 'alh hewlii! rew'],
elems: [
'ahierw',
'',
'alh hewlii! rew',
'sérialisation',
'this string should be long enough that a different code path is exercised'
],
reader: function () { return this.readString(); },
skipper: function () { this.skipString(); },
writer: function (s) { this.writeString(s); }
Expand Down

0 comments on commit 06b8038

Please sign in to comment.