Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

string_decoder: fix number of replacement chars #22709

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions src/string_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,16 +71,17 @@ MaybeLocal<String> StringDecoder::DecodeData(Isolate* isolate,
kIncompleteCharactersEnd);
if (Encoding() == UTF8) {
// For UTF-8, we need special treatment to align with the V8 decoder:
// If an incomplete character is found at a chunk boundary, we turn
// that character into a single invalid one.
// If an incomplete character is found at a chunk boundary, we use
// its remainder and pass it to V8 as-is.
for (size_t i = 0; i < nread && i < MissingBytes(); ++i) {
if ((data[i] & 0xC0) != 0x80) {
// This byte is not a continuation byte even though it should have
// been one.
// Act as if there was a 1-byte incomplete character, which does
// not make sense but works here because we know it's invalid.
// been one. We stop decoding of the incomplete character at this
// point (but still use the rest of the incomplete bytes from this
// chunk) and assume that the new, unexpected byte starts a new one.
state_[kMissingBytes] = 0;
state_[kBufferedBytes] = 1;
memcpy(IncompleteCharacterBuffer() + BufferedBytes(), data, i);
state_[kBufferedBytes] += i;
data += i;
nread -= i;
break;
Expand Down
48 changes: 48 additions & 0 deletions test/parallel/test-string-decoder-fuzz.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
'use strict';
require('../common');
const { StringDecoder } = require('string_decoder');
const util = require('util');
const assert = require('assert');

// Tests that, for random sequences of bytes, our StringDecoder gives the
// same result as a direction conversion using Buffer.toString().
// In particular, it checks that StringDecoder aligns with V8’s own output.

function rand(max) {
return Math.floor(Math.random() * max);
}

function randBuf(maxLen) {
const buf = Buffer.allocUnsafe(rand(maxLen));
for (let i = 0; i < buf.length; i++)
buf[i] = rand(256);
return buf;
}

const encodings = [
'utf16le', 'utf8', 'ascii', 'hex', 'base64', 'latin1'
];

function runSingleFuzzTest() {
const enc = encodings[rand(encodings.length)];
const sd = new StringDecoder(enc);
const bufs = [];
const strings = [];

const N = rand(10);
for (let i = 0; i < N; ++i) {
const buf = randBuf(50);
bufs.push(buf);
strings.push(sd.write(buf));
}
strings.push(sd.end());

assert.strictEqual(strings.join(''), Buffer.concat(bufs).toString(enc),
`Mismatch:\n${util.inspect(strings)}\n` +
util.inspect(bufs.map((buf) => buf.toString('hex'))) +
`\nfor encoding ${enc}`);
}

const start = Date.now();
while (Date.now() - start < 100)
runSingleFuzzTest();
11 changes: 11 additions & 0 deletions test/parallel/test-string-decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ assert.strictEqual(decoder.write(Buffer.alloc(20)), '\0'.repeat(10));
assert.strictEqual(decoder.write(Buffer.alloc(48)), '\0'.repeat(24));
assert.strictEqual(decoder.end(), '');

// Regression tests for https://github.com/nodejs/node/issues/22626
// (not enough replacement chars when having seen more than one byte of an
// incomplete multibyte characters).
decoder = new StringDecoder('utf8');
assert.strictEqual(decoder.write(Buffer.from('f69b', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('d1', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');
assert.strictEqual(decoder.write(Buffer.from('f4', 'hex')), '');
assert.strictEqual(decoder.write(Buffer.from('bde5', 'hex')), '\ufffd\ufffd');
assert.strictEqual(decoder.end(), '\ufffd');

common.expectsError(
() => new StringDecoder(1),
{
Expand Down