-
Notifications
You must be signed in to change notification settings - Fork 29.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
buffer: add SIMD Neon optimization for
byteLength
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me> Co-authored-by: Daniel Lemire <daniel@lemire.me>
- Loading branch information
1 parent
c9ec72d
commit a29a70d
Showing
4 changed files
with
84 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#include "node_simd.h" | ||
|
||
#if NODE_HAS_SIMD_NEON | ||
#include <arm_neon.h> | ||
#endif | ||
|
||
namespace node { | ||
namespace simd { | ||
|
||
#if NODE_HAS_SIMD_NEON | ||
uint32_t utf8_byte_length(const uint8_t* data, size_t length) { | ||
uint64_t result{0}; | ||
|
||
const int lanes = sizeof(uint8x16_t); | ||
const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8. | ||
const int unrolls = max_sra_count; | ||
const int unrolled_lanes = lanes * unrolls; | ||
|
||
const uint8_t *unroll_end = data + (length / unrolled_lanes) * unrolled_lanes; | ||
uint32_t length_after_unroll = length % unrolled_lanes; | ||
for (; data < unroll_end;) { | ||
uint8x16_t acc = {}; | ||
for (int i = 0; i < unrolls; ++i, data += lanes) { | ||
uint8x16_t chunk = vld1q_u8(data); | ||
acc = vsraq_n_u8(acc, chunk, 7); | ||
} | ||
result += vaddvq_u8(acc); | ||
} | ||
|
||
const uint8_t *simd_end = data + (length_after_unroll / lanes) * lanes; | ||
uint32_t length_after_simd = length % lanes; | ||
uint8x16_t acc = {}; | ||
for (; data < simd_end; data += lanes) { | ||
uint8x16_t chunk = vld1q_u8(data); | ||
acc = vsraq_n_u8(acc, chunk, 7); | ||
} | ||
result += vaddvq_u8(acc); | ||
|
||
const uint8_t *scalar_end = data + length_after_simd; | ||
for (; data < scalar_end; data += 1) { | ||
result += *data >> 7; | ||
} | ||
|
||
return result + length; | ||
} | ||
#else | ||
uint32_t utf8_byte_length(const uint8_t* data, size_t length) { | ||
uint32_t result = 0; | ||
for (uint32_t i = 0; i < length; ++i) { | ||
result += (data[i] >> 7); | ||
} | ||
result += length; | ||
return result; | ||
} | ||
#endif | ||
|
||
} // namespace simd | ||
} // namespace node |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#ifndef SRC_NODE_SIMD_H_ | ||
#define SRC_NODE_SIMD_H_ | ||
|
||
#if defined(__aarch64__) || defined(_M_ARM64) | ||
#define NODE_HAS_SIMD_NEON 1 | ||
#endif | ||
|
||
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS | ||
|
||
#include <string_view> | ||
|
||
namespace node { | ||
namespace simd { | ||
|
||
uint32_t utf8_byte_length(const uint8_t* input, size_t length); | ||
|
||
} // namespace simd | ||
} // namespace node | ||
|
||
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS | ||
|
||
#endif // SRC_NODE_SIMD_H_ |