Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util: add fast path for Latin1 decoding #55275

Merged
merged 15 commits into from
Dec 3, 2024
13 changes: 11 additions & 2 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const kDecoder = Symbol('decoder');
const kEncoder = Symbol('encoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kLatin1FastPath = Symbol('kLatin1FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
Expand All @@ -55,6 +56,7 @@ const {
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeLatin1,
} = binding;

const { Buffer } = require('buffer');
Expand Down Expand Up @@ -419,10 +421,13 @@ function makeTextDecoderICU() {
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kLatin1FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;

if (!this[kUTF8FastPath]) {
this.#prepareConverter();
anonrig marked this conversation as resolved.
Show resolved Hide resolved
if (this[kUTF8FastPath]) {
decodeUTF8(this.input, this[kIgnoreBOM], this[kFatal]);
anonrig marked this conversation as resolved.
Show resolved Hide resolved
} else if (this[kLatin1FastPath]) {
decodeLatin1(this.input);
}
}

Expand All @@ -443,6 +448,10 @@ function makeTextDecoderICU() {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kLatin1FastPath]) {
anonrig marked this conversation as resolved.
Show resolved Hide resolved
return decodeLatin1(input);
mertcanaltin marked this conversation as resolved.
Show resolved Hide resolved
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
Expand Down
40 changes: 40 additions & 0 deletions src/encoding_binding.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "encoding_binding.h"
#include "ada.h"
#include "env-inl.h"
#include "node_buffer.h"
#include "node_errors.h"
#include "node_external_reference.h"
#include "simdutf.h"
Expand Down Expand Up @@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
}

void BindingData::CreatePerContextProperties(Local<Object> target,
Expand All @@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
registry->Register(DecodeUTF8);
registry->Register(ToASCII);
registry->Register(ToUnicode);
registry->Register(DecodeLatin1);
}

void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);

CHECK_GE(args.Length(), 1);
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
args[0]->IsArrayBufferView())) {
return node::THROW_ERR_INVALID_ARG_TYPE(
env->isolate(),
"The \"input\" argument must be an instance of ArrayBuffer, "
"SharedArrayBuffer, or ArrayBufferView.");
}

ArrayBufferViewContents<uint8_t> buffer(args[0]);
const uint8_t* data = buffer.data();
size_t length = buffer.length();

if (length == 0) {
return args.GetReturnValue().SetEmptyString();
}

std::string result(length * 2, '\0');

size_t written = simdutf::convert_latin1_to_utf8(
reinterpret_cast<const char*>(data), length, &result[0]);
mertcanaltin marked this conversation as resolved.
Show resolved Hide resolved

if (written == 0) {
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
env->isolate(), "The encoded data was not valid for encoding latin1");
}

result.resize(written);

Local<Object> buffer_result =
node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
mertcanaltin marked this conversation as resolved.
Show resolved Hide resolved
args.GetReturnValue().Set(buffer_result);
}

} // namespace encoding_binding
Expand Down
1 change: 1 addition & 0 deletions src/encoding_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);

static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);
Expand Down
75 changes: 75 additions & 0 deletions test/cctest/test_encoding_binding.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#include "encoding_binding.h"
#include "env-inl.h"
#include "gtest/gtest.h"
#include "node_test_fixture.h"
#include "v8.h"

namespace node {
namespace encoding_binding {

bool RunDecodeLatin1(Environment* env,
Local<Value> args[],
Local<Value>* result) {
Isolate* isolate = env->isolate();
TryCatch try_catch(isolate);

BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(args));

if (try_catch.HasCaught()) {
return false;
}

*result = try_catch.Exception();
return true;
}

class EncodingBindingTest : public NodeTestFixture {};

TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
Environment* env = CreateEnvironment();
Isolate* isolate = env->isolate();
HandleScope handle_scope(isolate);

const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));

Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
Local<Value> args[] = {array};

Local<Value> result;
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));

String::Utf8Value utf8_result(isolate, result);
EXPECT_STREQ(*utf8_result, "Áéó");
}

TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
Environment* env = CreateEnvironment();
Isolate* isolate = env->isolate();
HandleScope handle_scope(isolate);

Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
Local<Value> args[] = {array};

Local<Value> result;
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));

String::Utf8Value utf8_result(isolate, result);
EXPECT_STREQ(*utf8_result, "");
}

TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
Environment* env = CreateEnvironment();
Isolate* isolate = env->isolate();
HandleScope handle_scope(isolate);

Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};

Local<Value> result;
EXPECT_FALSE(RunDecodeLatin1(env, args, &result));
}

} // namespace encoding_binding
} // namespace node
Loading