Skip to content

Commit

Permalink
Add support for AArch64 CRC32 instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
valpackett committed Dec 8, 2018
1 parent 1bc367a commit 188eab0
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ keywords = ["checksum", "crc", "crc32", "simd", "fast"]

[dependencies]
cfg-if = "0.1"
stdsimd = { path = "../../rust-lang-nursery/stdsimd/crates/stdsimd" }

[dev-dependencies]
bencher = "0.1"
quickcheck = { version = "0.6", default-features = false }
rand = "0.4"

[features]
nightly = []

[[bench]]
name = "bench"
harness = false
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ This crate contains multiple CRC32 implementations:

- A fast baseline implementation which processes up to 16 bytes per iteration
- An optimized implementation for modern `x86` using `sse` and `pclmulqdq` instructions
- An optimized implementation for `aarch64` using `crc32` instructions

Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
optimal implementation for the current CPU feature set.
Expand Down
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
//! Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
//! optimal implementation for the current CPU feature set.

#![cfg_attr(all(feature = "nightly", target_arch = "aarch64"), feature(aarch64_target_feature, stdsimd))]

// XXX: remove this
#[cfg(all(feature = "nightly", target_arch = "aarch64"))]
#[macro_use]
extern crate stdsimd;

#[deny(missing_docs)]
#[cfg(test)]
#[macro_use]
Expand Down
133 changes: 133 additions & 0 deletions src/specialized/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//use std::arch::aarch64 as arch;
use stdsimd::arch::aarch64 as arch;

#[derive(Clone)]
pub struct State {
state: u32,
}

impl State {
pub fn new() -> Option<Self> {
if is_aarch64_feature_detected!("crc") {
// SAFETY: The conditions above ensure that all
// required instructions are supported by the CPU.
Some(Self { state: 0 })
} else {
None
}
}

pub fn update(&mut self, buf: &[u8]) {
// SAFETY: The `State::new` constructor ensures that all
// required instructions are supported by the CPU.
self.state = unsafe { calculate(self.state, buf) }
}

pub fn finalize(self) -> u32 {
self.state
}

pub fn reset(&mut self) {
self.state = 0;
}

pub fn combine(&mut self, other: u32, amount: u64) {
self.state = ::combine::combine(self.state, other, amount);
}
}

#[target_feature(enable = "crc")]
pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
let mut len = data.len();
let mut c32 = !crc;
let mut ptr = data.as_ptr();
let mut ptr2;
let mut ptr4;
let mut ptr8;

if len != 0 && ((ptr as usize) & 1) != 0 {
c32 = arch::crc32b(c32, *ptr as _);
ptr = ptr.offset(1);
len -= 1;
}

if len > 2 && ((ptr as usize) & 2) != 0 {
ptr2 = ptr as *const u16;
c32 = arch::crc32h(c32, *ptr2 as _);
ptr2 = ptr2.offset(1);
len -= 2;
ptr4 = ptr2 as *const u32;
} else {
ptr4 = ptr as *const u32;
}

if len > 4 && ((ptr as usize) & 2) != 0 {
c32 = arch::crc32w(c32, *ptr4);
ptr4 = ptr4.offset(1);
len -= 4;
}

ptr8 = ptr4 as *const u64;

while len >= 32 {
c32 = arch::crc32x(c32, *ptr8);
ptr8 = ptr8.offset(1);
c32 = arch::crc32x(c32, *ptr8);
ptr8 = ptr8.offset(1);
c32 = arch::crc32x(c32, *ptr8);
ptr8 = ptr8.offset(1);
c32 = arch::crc32x(c32, *ptr8);
ptr8 = ptr8.offset(1);
len -= 32;
}

while len >= 8 {
c32 = arch::crc32x(c32, *ptr8);
ptr8 = ptr8.offset(1);
len -= 8;
}

if len >= 4 {
ptr4 = ptr8 as *const u32;
c32 = arch::crc32w(c32, *ptr4);
ptr2 = ptr4.offset(1) as *const u16;
len -= 4;
} else {
ptr2 = ptr8 as *const u16;
}

if len >= 2 {
c32 = arch::crc32h(c32, *ptr2 as _);
ptr2 = ptr2.offset(1);
len -= 2;
}

if len != 0 {
ptr = ptr2 as *const u8;
c32 = arch::crc32b(c32, *ptr as _);
}

!c32
}

#[cfg(test)]
mod test {
quickcheck! {
fn check_against_baseline(chunks: Vec<(Vec<u8>, usize)>) -> bool {
let mut baseline = super::super::super::baseline::State::new();
let mut aarch64 = super::State::new().expect("not supported");
for (chunk, mut offset) in chunks {
// simulate random alignments by offsetting the slice by up to 15 bytes
offset = offset & 0xF;
if chunk.len() <= offset {
baseline.update(&chunk);
aarch64.update(&chunk);
} else {
baseline.update(&chunk[offset..]);
aarch64.update(&chunk[offset..]);
}
}
aarch64.finalize() == baseline.finalize()
}
}
}
3 changes: 3 additions & 0 deletions src/specialized/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cfg_if! {
))] {
mod pclmulqdq;
pub use self::pclmulqdq::State;
} else if #[cfg(all(feature = "nightly", target_arch = "aarch64"))] {
mod aarch64;
pub use self::aarch64::State;
} else {
#[derive(Clone)]
pub enum State {}
Expand Down

0 comments on commit 188eab0

Please sign in to comment.