Skip to content

fix: TextDecoder Big5 invalid sequence handling #59046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 47 additions & 34 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// https://encoding.spec.whatwg.org

const {
Boolean,

Check failure on line 7 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'Boolean' is assigned a value but never used
ObjectDefineProperties,
ObjectGetOwnPropertyDescriptors,
ObjectSetPrototypeOf,
Expand All @@ -27,10 +27,10 @@
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
const kFatal = Symbol('kFatal');

Check failure on line 30 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'kFatal' is assigned a value but never used
const kUTF8FastPath = Symbol('kUTF8FastPath');

Check failure on line 31 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'kUTF8FastPath' is assigned a value but never used
const kLatin1FastPath = Symbol('kLatin1FastPath');

Check failure on line 32 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'kLatin1FastPath' is assigned a value but never used
const kIgnoreBOM = Symbol('kIgnoreBOM');

Check failure on line 33 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'kIgnoreBOM' is assigned a value but never used

const {
getConstructorOf,
Expand All @@ -55,8 +55,8 @@
encodeInto,
encodeIntoResults,
encodeUtf8String,
decodeUTF8,

Check failure on line 58 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'decodeUTF8' is assigned a value but never used
decodeLatin1,

Check failure on line 59 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'decodeLatin1' is assigned a value but never used
} = binding;

const { Buffer } = require('buffer');
Expand Down Expand Up @@ -395,8 +395,8 @@

function makeTextDecoderICU() {
const {
decode: _decode,

Check failure on line 398 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'_decode' is assigned a value but never used
getConverter,

Check failure on line 399 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'getConverter' is assigned a value but never used
} = internalBinding('icu');

class TextDecoder {
Expand All @@ -405,61 +405,75 @@
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

const enc = getEncodingFromLabel(encoding);
if (enc === undefined)
if (enc === undefined || !hasConverter(enc))

Check failure on line 408 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'hasConverter' is not defined
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);

let flags = 0;
if (options !== null) {
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
if (options.fatal) {
throw new ERR_NO_ICU('"fatal" option');
}
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

this[kDecoder] = true;
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
this[kHandle] = new (lazyStringDecoder())(enc);
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kLatin1FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;

if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
this.#prepareConverter();
}
}

#prepareConverter() {
if (this[kHandle] !== undefined) return;
const handle = getConverter(this[kEncoding], this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
this[kBOMSeen] = false;
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);

if (isAnyArrayBuffer(input)) {
try {
input = Buffer.from(input);
} catch {
input = empty;
}
} else if (isArrayBufferView(input)) {
try {
input = Buffer.from(input.buffer, input.byteOffset,
input.byteLength);
} catch {
input = empty;
}
} else {
throw new ERR_INVALID_ARG_TYPE('input',
['ArrayBuffer', 'ArrayBufferView'],
input);
}

this[kUTF8FastPath] &&= !(options?.stream);
this[kLatin1FastPath] &&= !(options?.stream);
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
this[kBOMSeen] = false;
}

if (this[kLatin1FastPath]) {
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
if (options !== null && options.stream) {
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
} else {
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
this[kHandle].end(input) :
this[kHandle].write(input);

let flags = 0;
if (options !== null)
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
if (result.length > 0 &&
!this[kBOMSeen] &&
!(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
// If the very first result in the stream is a BOM, and we are not
// explicitly told to ignore it, then we discard it.
if (result[0] === '\ufeff') {
result = StringPrototypeSlice(result, 1);
}
this[kBOMSeen] = true;
}

return _decode(this[kHandle], input, flags, this.encoding);
return result;
}
}

Expand Down Expand Up @@ -555,7 +569,6 @@
return result;
}
}

return TextDecoder;
}

Expand Down
11 changes: 11 additions & 0 deletions test/parallel/test-whatwg-encoding-custom-textdecoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,14 @@ if (common.hasIntl) {
const decoder = new TextDecoder();
assert.strictEqual(decoder.decode(buffer), '');
}

//Big5 encoding error byte sequence handling
{
const decoder = new TextDecoder('Big5');
const input = new Uint8Array([0x83, 0x5C]);
const output = decoder.decode(input);

assert.strictEqual(output.length, 2, 'Big5 error sequence should decode to 2 characters');
assert.strictEqual(output.charCodeAt(0).toString(16), 'fffd', 'The first character should be U+FFFD');
assert.strictEqual(output.charCodeAt(1).toString(16), '5c', 'The second character should be U+005C');
}
Loading