Skip to content

Commit

Permalink
Add support for AArch64 CRC32 instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
valpackett committed Jan 17, 2019
1 parent 5371d23 commit 0c44594
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ rand = "0.4"
[features]
default = ["std"]
std = []
nightly = []

[[bench]]
name = "bench"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ This crate contains multiple CRC32 implementations:

- A fast baseline implementation which processes up to 16 bytes per iteration
- An optimized implementation for modern `x86` using `sse` and `pclmulqdq` instructions
- An optimized implementation for `aarch64` using `crc32` instructions

Calling the `Hasher::new` constructor at runtime will perform a feature detection to select the most
optimal implementation for the current CPU feature set.
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
//! optimal implementation for the current CPU feature set.

#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(all(feature = "nightly", target_arch = "aarch64"), feature(stdsimd, aarch64_target_feature))]

#[deny(missing_docs)]
#[cfg(test)]
Expand Down
85 changes: 85 additions & 0 deletions src/specialized/aarch64.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use std::arch::aarch64 as arch;

#[derive(Clone)]
pub struct State {
state: u32,
}

impl State {
pub fn new() -> Option<Self> {
if is_aarch64_feature_detected!("crc") {
// SAFETY: The conditions above ensure that all
// required instructions are supported by the CPU.
Some(Self { state: 0 })
} else {
None
}
}

pub fn update(&mut self, buf: &[u8]) {
// SAFETY: The `State::new` constructor ensures that all
// required instructions are supported by the CPU.
self.state = unsafe { calculate(self.state, buf) }
}

pub fn finalize(self) -> u32 {
self.state
}

pub fn reset(&mut self) {
self.state = 0;
}

pub fn combine(&mut self, other: u32, amount: u64) {
self.state = ::combine::combine(self.state, other, amount);
}
}

// target_feature is necessary to allow rustc to inline the crc32* wrappers
#[target_feature(enable = "crc")]
pub unsafe fn calculate(crc: u32, data: &[u8]) -> u32 {
let mut c32 = !crc;
let (pre_quad, quads, post_quad) = data.align_to::<u64>();

c32 = pre_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));

// unrolling increases performance by a lot
let mut quad_iter = quads.chunks_exact(8);
for chunk in &mut quad_iter {
c32 = arch::__crc32d(c32, chunk[0]);
c32 = arch::__crc32d(c32, chunk[1]);
c32 = arch::__crc32d(c32, chunk[2]);
c32 = arch::__crc32d(c32, chunk[3]);
c32 = arch::__crc32d(c32, chunk[4]);
c32 = arch::__crc32d(c32, chunk[5]);
c32 = arch::__crc32d(c32, chunk[6]);
c32 = arch::__crc32d(c32, chunk[7]);
}
c32 = quad_iter.remainder().iter().fold(c32, |acc, &q| arch::__crc32d(acc, q));

c32 = post_quad.iter().fold(c32, |acc, &b| arch::__crc32b(acc, b));

!c32
}

#[cfg(test)]
mod test {
quickcheck! {
fn check_against_baseline(chunks: Vec<(Vec<u8>, usize)>) -> bool {
let mut baseline = super::super::super::baseline::State::new();
let mut aarch64 = super::State::new().expect("not supported");
for (chunk, mut offset) in chunks {
// simulate random alignments by offsetting the slice by up to 15 bytes
offset = offset & 0xF;
if chunk.len() <= offset {
baseline.update(&chunk);
aarch64.update(&chunk);
} else {
baseline.update(&chunk[offset..]);
aarch64.update(&chunk[offset..]);
}
}
aarch64.finalize() == baseline.finalize()
}
}
}
3 changes: 3 additions & 0 deletions src/specialized/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ cfg_if! {
))] {
mod pclmulqdq;
pub use self::pclmulqdq::State;
} else if #[cfg(all(feature = "nightly", target_arch = "aarch64"))] {
mod aarch64;
pub use self::aarch64::State;
} else {
#[derive(Clone)]
pub enum State {}
Expand Down

0 comments on commit 0c44594

Please sign in to comment.