Skip to content

Commit

Permalink
util: add fast path for utf8 encoding
Browse files Browse the repository at this point in the history
Co-authored-by: Anna Henningsen <anna@addaleax.net>
PR-URL: #45412
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Rich Trott <rtrott@gmail.com>
Reviewed-By: Santiago Gimeno <santiago.gimeno@gmail.com>
  • Loading branch information
anonrig and addaleax authored Nov 12, 2022
1 parent 3439eb9 commit 4ac830e
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 6 deletions.
35 changes: 30 additions & 5 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// https://encoding.spec.whatwg.org

const {
Boolean,
ObjectCreate,
ObjectDefineProperties,
ObjectGetOwnPropertyDescriptors,
Expand All @@ -28,6 +29,8 @@ const kFlags = Symbol('flags');
const kEncoding = Symbol('encoding');
const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
getConstructorOf,
Expand All @@ -49,7 +52,8 @@ const {

const {
encodeInto,
encodeUtf8String
encodeUtf8String,
decodeUTF8,
} = internalBinding('buffer');

let Buffer;
Expand Down Expand Up @@ -397,19 +401,40 @@ function makeTextDecoderICU() {
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
}

const handle = getConverter(enc, flags);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
// Only support fast path for UTF-8 without FATAL flag
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);

this[kDecoder] = true;
this[kHandle] = handle;
this[kFlags] = flags;
this[kEncoding] = enc;
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
this[kUTF8FastPath] = fastPathAvailable;
this[kHandle] = undefined;

if (!fastPathAvailable) {
this.#prepareConverter();
}
}

#prepareConverter() {
if (this[kHandle] !== undefined) return;
const handle = getConverter(this[kEncoding], this[kFlags]);
if (handle === undefined)
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
this[kHandle] = handle;
}

decode(input = empty, options = kEmptyObject) {
validateDecoder(this);

this[kUTF8FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM]);
}

this.#prepareConverter();

validateObject(options, 'options', {
nullable: true,
allowArray: true,
Expand Down
45 changes: 45 additions & 0 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "node_blob.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"

#include "env-inl.h"
Expand Down Expand Up @@ -565,6 +566,48 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
args.GetReturnValue().Set(ret);
}

// Convert the input into an encoded string
void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args); // list, flags

if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"list\" argument must be an instance of SharedArrayBuffer, "
"ArrayBuffer or ArrayBufferView.");
}

ArrayBufferViewContents<char> buffer(args[0]);

CHECK(args[1]->IsBoolean());
bool ignore_bom = args[1]->IsTrue();

const char* data = buffer.data();
size_t length = buffer.length();

if (!ignore_bom && length >= 3) {
if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) {
data += 3;
length -= 3;
}
}

if (length == 0) return args.GetReturnValue().SetEmptyString();

Local<Value> error;
MaybeLocal<Value> maybe_ret =
StringBytes::Encode(env->isolate(), data, length, UTF8, &error);
Local<Value> ret;

if (!maybe_ret.ToLocal(&ret)) {
CHECK(!error.IsEmpty());
env->isolate()->ThrowException(error);
return;
}

args.GetReturnValue().Set(ret);
}

// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
void Copy(const FunctionCallbackInfo<Value> &args) {
Expand Down Expand Up @@ -1282,6 +1325,7 @@ void Initialize(Local<Object> target,

SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);

SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8);
SetMethod(context, target, "copy", Copy);
Expand Down Expand Up @@ -1339,6 +1383,7 @@ void Initialize(Local<Object> target,
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
registry->Register(SetBufferPrototype);
registry->Register(CreateFromString);
registry->Register(DecodeUTF8);

registry->Register(ByteLengthUtf8);
registry->Register(Copy);
Expand Down
2 changes: 1 addition & 1 deletion test/parallel/test-whatwg-encoding-custom-textdecoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ if (common.hasIntl) {
' fatal: false,\n' +
' ignoreBOM: true,\n' +
' [Symbol(flags)]: 4,\n' +
' [Symbol(handle)]: Converter {}\n' +
' [Symbol(handle)]: undefined\n' +
'}'
);
} else {
Expand Down

0 comments on commit 4ac830e

Please sign in to comment.