Skip to content

Commit

Permalink
buffer: add SIMD Neon optimization for byteLength
Browse files Browse the repository at this point in the history
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me>
Co-authored-by: Daniel Lemire <daniel@lemire.me>
  • Loading branch information
3 people committed May 17, 2023
1 parent c9ec72d commit a29a70d
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 8 deletions.
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
'src/node_report_utils.cc',
'src/node_sea.cc',
'src/node_serdes.cc',
'src/node_simd.cc',
'src/node_shadow_realm.cc',
'src/node_snapshotable.cc',
'src/node_sockaddr.cc',
Expand Down
11 changes: 3 additions & 8 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"
#include "node_simd.h"

#include "env-inl.h"
#include "simdutf.h"
Expand Down Expand Up @@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {

uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
uint32_t result = 0;
uint32_t length = source.length;
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
return node::simd::utf8_byte_length(
reinterpret_cast<const uint8_t*>(source.data), source.length);
}

static v8::CFunction fast_byte_length_utf8(
Expand Down
58 changes: 58 additions & 0 deletions src/node_simd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include "node_simd.h"

#if NODE_HAS_SIMD_NEON
#include <arm_neon.h>
#endif

namespace node {
namespace simd {

#if NODE_HAS_SIMD_NEON
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint64_t result{0};

const int lanes = sizeof(uint8x16_t);
const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8.
const int unrolls = max_sra_count;
const int unrolled_lanes = lanes * unrolls;

const uint8_t *unroll_end = data + (length / unrolled_lanes) * unrolled_lanes;
uint32_t length_after_unroll = length % unrolled_lanes;
for (; data < unroll_end;) {
uint8x16_t acc = {};
for (int i = 0; i < unrolls; ++i, data += lanes) {
uint8x16_t chunk = vld1q_u8(data);
acc = vsraq_n_u8(acc, chunk, 7);
}
result += vaddvq_u8(acc);
}

const uint8_t *simd_end = data + (length_after_unroll / lanes) * lanes;
uint32_t length_after_simd = length % lanes;
uint8x16_t acc = {};
for (; data < simd_end; data += lanes) {
uint8x16_t chunk = vld1q_u8(data);
acc = vsraq_n_u8(acc, chunk, 7);
}
result += vaddvq_u8(acc);

const uint8_t *scalar_end = data + length_after_simd;
for (; data < scalar_end; data += 1) {
result += *data >> 7;
}

return result + length;
}
#else
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint32_t result = 0;
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
}
#endif

} // namespace simd
} // namespace node
22 changes: 22 additions & 0 deletions src/node_simd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef SRC_NODE_SIMD_H_
#define SRC_NODE_SIMD_H_

#if defined(__aarch64__) || defined(_M_ARM64)
#define NODE_HAS_SIMD_NEON 1
#endif

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <string_view>

namespace node {
namespace simd {

uint32_t utf8_byte_length(const uint8_t* input, size_t length);

} // namespace simd
} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#endif // SRC_NODE_SIMD_H_

0 comments on commit a29a70d

Please sign in to comment.