Skip to content

Commit

Permalink
simdutf8
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Aug 17, 2021
1 parent 2719fc7 commit fb1a3bb
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 12 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ unstable-simd = [
"bytecount/generic-simd",
"bytecount/runtime-dispatch-simd",
"encoding_rs/simd-accel",
"simdutf8/aarch64_neon",
"simdutf8/std",
]

[dependencies]
Expand All @@ -71,6 +73,7 @@ pyo3 = { version = "^0.14.2", default_features = false, features = ["extension-m
ryu = { version = "1", default_features = false }
serde = { version = "1", default_features = false }
serde_json = { version = "^1.0.66", default_features = false, features = ["std", "float_roundtrip"] }
simdutf8 = { version = "0.1", default_features = false, optional = true }
smallvec = { version = "^1.6", default_features = false, features = ["union", "write"] }

[profile.release]
Expand Down
16 changes: 6 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1162,18 +1162,14 @@ If someone implements it well.
To package orjson requires [Rust](https://www.rust-lang.org/) and the
[maturin](https://github.com/PyO3/maturin) build tool.

This is an example for the x86_64-unknown-linux-gnu target on the Rust
nightly channel:
This is an example for x86_64 on the Rust nightly channel:

```sh
RUSTFLAGS="-C target-cpu=k8" maturin build --no-sdist --release --strip --cargo-extra-args="--features=unstable-simd"
````
To build on the stable channel, do not specify `--features=unstable-simd`. It
is disabled by default. There is a performance benefit of something like
10% when building on nightly with `unstable-simd`.

The explicit `RUSTFLAGS` enables SSE2 on amd64. aarch64 does not need any
`target-feature` specified.
export RUSTFLAGS="-C target-cpu=k8"
maturin build --no-sdist --release --strip --cargo-extra-args="--features=unstable-simd"
```

To build on the stable channel, do not specify `--features=unstable-simd`.

The project's own CI tests against `nightly-2021-08-04` and stable 1.54. It
is prudent to pin the nightly version because that channel can introduce
Expand Down
2 changes: 1 addition & 1 deletion develop
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

rm -f target/wheels/*

export RUSTFLAGS="-C link-arg=-fuse-ld=lld -C target-cpu=k8"
export RUSTFLAGS="-C target-cpu=k8"

maturin build --no-sdist --compatibility off -i python3 --release "$@"

Expand Down
26 changes: 25 additions & 1 deletion src/deserialize/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,30 @@ use std::fmt;
use std::os::raw::c_char;
use std::ptr::NonNull;

#[cfg(all(target_arch = "x86_64", feature = "unstable-simd"))]
fn is_valid_utf8(buf: &[u8]) -> bool {
if std::is_x86_feature_detected!("sse4.2") {
simdutf8::basic::from_utf8(buf).is_ok()
} else {
encoding_rs::Encoding::utf8_valid_up_to(buf) == buf.len()
}
}

#[cfg(all(target_arch = "x86_64", not(feature = "unstable-simd")))]
fn is_valid_utf8(buf: &[u8]) -> bool {
encoding_rs::Encoding::utf8_valid_up_to(buf) == buf.len()
}

#[cfg(target_arch = "aarch64")]
fn is_valid_utf8(buf: &[u8]) -> bool {
simdutf8::basic::from_utf8(buf).is_ok()
}

#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
fn is_valid_utf8(buf: &[u8]) -> bool {
simdutf8::basic::from_utf8(buf).is_ok()
}

pub fn deserialize(
ptr: *mut pyo3::ffi::PyObject,
) -> std::result::Result<NonNull<pyo3::ffi::PyObject>, DeserializeError<'static>> {
Expand Down Expand Up @@ -55,7 +79,7 @@ pub fn deserialize(
));
}
contents = unsafe { std::slice::from_raw_parts(buffer, length) };
if encoding_rs::Encoding::utf8_valid_up_to(contents) != length {
if !is_valid_utf8(contents) {
return Err(DeserializeError::new(Cow::Borrowed(INVALID_STR), 0, 0, ""));
}
}
Expand Down

0 comments on commit fb1a3bb

Please sign in to comment.