Skip to content

Commit

Permalink
Update util.cc
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel Lemire <daniel@lemire.me>
  • Loading branch information
anonrig and lemire committed Sep 12, 2024
1 parent 9ea7fc4 commit 435fa04
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 27 deletions.
40 changes: 22 additions & 18 deletions src/string_bytes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -419,47 +419,47 @@ Maybe<size_t> StringBytes::StorageSize(Isolate* isolate,
Local<Value> val,
enum encoding encoding) {
HandleScope scope(isolate);
size_t data_size = 0;
bool is_buffer = Buffer::HasInstance(val);

if (is_buffer && (encoding == BUFFER || encoding == LATIN1)) {
if (Buffer::HasInstance(val) && (encoding == BUFFER || encoding == LATIN1)) {
return Just(Buffer::Length(val));
}

Local<String> str;
if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
return Nothing<size_t>();
String::ValueView view(isolate, str);
size_t data_size = 0;

switch (encoding) {
case ASCII:
case LATIN1:
data_size = str->Length();
data_size = view.length();
break;

case BUFFER:
case UTF8:
// A single UCS2 codepoint never takes up more than 3 utf8 bytes.
// It is an exercise for the caller to decide when a string is
// long enough to justify calling Size() instead of StorageSize()
data_size = 3 * str->Length();
data_size = 3 * view.length();
break;

case UCS2:
data_size = str->Length() * sizeof(uint16_t);
data_size = view.length() * sizeof(uint16_t);
break;

case BASE64URL:
data_size = simdutf::base64_length_from_binary(str->Length(),
data_size = simdutf::base64_length_from_binary(view.length(),
simdutf::base64_url);
break;

case BASE64:
data_size = simdutf::base64_length_from_binary(str->Length());
data_size = simdutf::base64_length_from_binary(view.length());
break;

case HEX:
CHECK(str->Length() % 2 == 0 && "invalid hex string length");
data_size = str->Length() / 2;
CHECK(view.length() % 2 == 0 && "invalid hex string length");
data_size = view.length() / 2;
break;

default:
Expand All @@ -480,32 +480,36 @@ Maybe<size_t> StringBytes::Size(Isolate* isolate,
Local<String> str;
if (!val->ToString(isolate->GetCurrentContext()).ToLocal(&str))
return Nothing<size_t>();
String::ValueView view(isolate, str);

switch (encoding) {
case ASCII:
case LATIN1:
return Just<size_t>(str->Length());
return Just<size_t>(view.length());

case BUFFER:
case UTF8:
return Just<size_t>(str->Utf8Length(isolate));
if (view.is_one_byte()) {
return Just<size_t>(simdutf::utf8_length_from_latin1(
reinterpret_cast<const char*>(view.data8()), view.length()));
}
return Just<size_t>(simdutf::utf8_length_from_utf16(
reinterpret_cast<const char16_t*>(view.data16()), view.length()));

case UCS2:
return Just(str->Length() * sizeof(uint16_t));
return Just(view.length() * sizeof(uint16_t));

case BASE64URL: {
String::Value value(isolate, str);
return Just(simdutf::base64_length_from_binary(value.length(),
return Just(simdutf::base64_length_from_binary(view.length(),
simdutf::base64_url));
}

case BASE64: {
String::Value value(isolate, str);
return Just(simdutf::base64_length_from_binary(value.length()));
return Just(simdutf::base64_length_from_binary(view.length()));
}

case HEX:
return Just<size_t>(str->Length() / 2);
return Just<size_t>(view.length() / 2);
}

UNREACHABLE();
Expand Down
24 changes: 15 additions & 9 deletions src/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,26 @@ static void MakeUtf8String(Isolate* isolate,
if (!value->ToString(isolate->GetCurrentContext()).ToLocal(&string)) return;
String::ValueView value_view(isolate, string);

auto value_length = value_view.length();

if (value_view.is_one_byte()) {
target->AllocateSufficientStorage(value_view.length() + 1);
target->SetLengthAndZeroTerminate(value_view.length());
memcpy(target->out(),
reinterpret_cast<const char*>(value_view.data8()),
value_view.length());
auto const_char = reinterpret_cast<const char*>(value_view.data8());
auto expected_length =
target->capacity() > (static_cast<size_t>(value_length) * 2 + 1)
? simdutf::utf8_length_from_latin1(const_char, value_length)
: value_length * 2;

// Add +1 for null termination.
target->AllocateSufficientStorage(expected_length + 1);
target->SetLengthAndZeroTerminate(expected_length);
auto actual_length = simdutf::convert_latin1_to_utf8(
const_char, value_length, target->out());
target->SetLength(actual_length);
return;
}

// Add +1 for null termination.
auto storage = simdutf::utf8_length_from_utf16(
reinterpret_cast<const char16_t*>(value_view.data16()),
value_view.length()) +
1;
size_t storage = (3 * value_length) + 1;
target->AllocateSufficientStorage(storage);

// TODO(@anonrig): Use simdutf to speed up non-one-byte strings once it's
Expand Down

0 comments on commit 435fa04

Please sign in to comment.