diff --git a/.appveyor.yml b/.appveyor.yml index 352b3bc3aa915..bd02240ad1060 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,4 +1,9 @@ environment: + # We don't want to do identical comdat folding as it messes up the ability to + # generate lossless backtraces in some cases. This is enabled by rustc by + # default so pass a flag to disable it to ensure our tests work ok. + RUSTFLAGS: -Clink-args=/OPT:NOICF + matrix: - TARGET: x86_64-pc-windows-msvc @@ -15,4 +20,5 @@ build: false test_script: - cargo test --target %TARGET% + - set RUST_BACKTRACE=1 - cargo test --target %TARGET% --release diff --git a/Cargo.toml b/Cargo.toml index 0da061e71c809..87cd5dd14ca44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ debug = true opt-level = 3 [profile.bench] -debug = 1 +debug = true opt-level = 3 [dev-dependencies] diff --git a/assert-instr/assert-instr-macro/src/lib.rs b/assert-instr/assert-instr-macro/src/lib.rs index 1c4126149097a..9d7093a523223 100644 --- a/assert-instr/assert-instr-macro/src/lib.rs +++ b/assert-instr/assert-instr-macro/src/lib.rs @@ -44,7 +44,9 @@ pub fn assert_instr(attr: TokenStream, item: TokenStream) -> TokenStream { #[allow(non_snake_case)] {ignore} fn assert_instr_{name}() {{ - ::assert_instr::assert({name} as usize, \"{instr}\"); + ::assert_instr::assert({name} as usize, + \"{name}\", + \"{instr}\"); }} ", name = name.as_str(), instr = instr.as_str(), ignore = ignore); let test: TokenStream = test.parse().unwrap(); diff --git a/assert-instr/src/lib.rs b/assert-instr/src/lib.rs index 596668a8f59b4..ada7b8bc3fa0b 100644 --- a/assert-instr/src/lib.rs +++ b/assert-instr/src/lib.rs @@ -221,29 +221,39 @@ fn normalize(symbol: &str) -> String { /// /// This asserts that the function at `fnptr` contains the instruction /// `expected` provided. -pub fn assert(fnptr: usize, expected: &str) { +pub fn assert(fnptr: usize, fnname: &str, expected: &str) { // Translate this function pointer to a symbolic name that we'd have found // in the disassembly. let mut sym = None; backtrace::resolve(fnptr as *mut _, |name| { sym = name.name().and_then(|s| s.as_str()).map(normalize); }); - let sym = match sym { + + let functions = match sym.as_ref().and_then(|s| DISASSEMBLY.get(s)) { Some(s) => s, - None => panic!("failed to get symbol of function pointer: {}", fnptr), + None => { + if let Some(sym) = sym { + println!("assumed symbol name: `{}`", sym); + } + println!("maybe related functions"); + for f in DISASSEMBLY.keys().filter(|k| k.contains(fnname)) { + println!("\t- {}", f); + } + panic!("failed to find disassembly of {:#x} ({})", fnptr, fnname); + } }; - // Find our function in the list of all disassembled functions - let functions = &DISASSEMBLY.get(&sym) - .expect(&format!("failed to find disassembly of {}", sym)); assert_eq!(functions.len(), 1); let function = &functions[0]; // Look for `expected` as the first part of any instruction in this // function, returning if we do indeed find it. for instr in function.instrs.iter() { + // Gets the first instruction, e.g. tzcntl in tzcntl %rax,%rax if let Some(part) = instr.parts.get(0) { - if part == expected { + // Truncates the instruction with the length of the expected + // instruction: tzcntl => tzcnt and compares that. + if part.starts_with(expected) { return } } @@ -251,7 +261,7 @@ pub fn assert(fnptr: usize, expected: &str) { // Help debug by printing out the found disassembly, and then panic as we // didn't find the instruction. - println!("disassembly for {}: ", sym); + println!("disassembly for {}: ", sym.as_ref().unwrap()); for (i, instr) in function.instrs.iter().enumerate() { print!("\t{:2}: ", i); for part in instr.parts.iter() { @@ -261,4 +271,3 @@ pub fn assert(fnptr: usize, expected: &str) { } panic!("failed to find instruction `{}` in the disassembly", expected); } - diff --git a/src/arm/mod.rs b/src/arm/mod.rs new file mode 100644 index 0000000000000..9472441ae4feb --- /dev/null +++ b/src/arm/mod.rs @@ -0,0 +1,10 @@ +//! ARM intrinsics. +pub use self::v6::*; +pub use self::v7::*; +#[cfg(target_arch = "aarch64")] +pub use self::v8::*; + +mod v6; +mod v7; +#[cfg(target_arch = "aarch64")] +mod v8; diff --git a/src/arm/v6.rs b/src/arm/v6.rs new file mode 100644 index 0000000000000..95442b374f8cf --- /dev/null +++ b/src/arm/v6.rs @@ -0,0 +1,25 @@ +//! ARMv6 intrinsics. +//! +//! The reference is [ARMv6-M Architecture Reference +//! Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index.html). + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u8(x: u8) -> u8 { + x.swap_bytes() as u8 +} + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u16(x: u16) -> u16 { + x.swap_bytes() as u16 +} + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u32(x: u32) -> u32 { + x.swap_bytes() as u32 +} diff --git a/src/arm/v7.rs b/src/arm/v7.rs new file mode 100644 index 0000000000000..1052b8477a923 --- /dev/null +++ b/src/arm/v7.rs @@ -0,0 +1,40 @@ +//! ARMv7 intrinsics. +//! +//! The reference is [ARMv7-M Architecture Reference Manual (Issue +//! E.b)](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e.b/index.html). + +pub use super::v6::*; + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u8(x: u8) -> u8 { + x.leading_zeros() as u8 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u16(x: u16) -> u16 { + x.leading_zeros() as u16 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u32(x: u32) -> u32 { + x.leading_zeros() as u32 +} + +#[allow(dead_code)] +extern "C" { + #[link_name="llvm.bitreverse.i32"] + fn rbit_u32(i: i32) -> i32; +} + +/// Reverse the bit order. +#[inline(always)] +#[cfg_attr(test, assert_instr(rbit))] +pub fn _rbit_u32(x: u32) -> u32 { + unsafe { rbit_u32(x as i32) as u32 } +} diff --git a/src/arm/v8.rs b/src/arm/v8.rs new file mode 100644 index 0000000000000..e49ca4fe1f25e --- /dev/null +++ b/src/arm/v8.rs @@ -0,0 +1,54 @@ +//! ARMv8 intrinsics. +//! +//! The reference is [ARMv8-A Reference Manual](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.k_10775/index.html). + +pub use super::v7::*; + +/// Reverse the order of the bytes. +#[inline(always)] +#[cfg_attr(test, assert_instr(rev))] +pub fn _rev_u64(x: u64) -> u64 { + x.swap_bytes() as u64 +} + +/// Count Leading Zeros. +#[inline(always)] +#[cfg_attr(test, assert_instr(clz))] +pub fn _clz_u64(x: u64) -> u64 { + x.leading_zeros() as u64 +} + +#[allow(dead_code)] +extern "C" { + #[link_name="llvm.bitreverse.i64"] + fn rbit_u64(i: i64) -> i64; +} + +/// Reverse the bit order. +#[inline(always)] +#[cfg_attr(test, assert_instr(rbit))] +pub fn _rbit_u64(x: u64) -> u64 { + unsafe { rbit_u64(x as i64) as u64 } +} + +/// Counts the leading most significant bits set. +/// +/// When all bits of the operand are set it returns the size of the operand in +/// bits. +#[inline(always)] +// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802 +#[cfg_attr(test, assert_instr(clz))] +pub fn _cls_u32(x: u32) -> u32 { + u32::leading_zeros(!x) as u32 +} + +/// Counts the leading most significant bits set. +/// +/// When all bits of the operand are set it returns the size of the operand in +/// bits. +#[inline(always)] +// LLVM Bug (should be cls): https://bugs.llvm.org/show_bug.cgi?id=31802 +#[cfg_attr(test, assert_instr(clz))] +pub fn _cls_u64(x: u64) -> u64 { + u64::leading_zeros(!x) as u64 +} diff --git a/src/lib.rs b/src/lib.rs index e2ec276884099..2e75c3e833d0f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,6 +20,9 @@ pub mod simd { pub mod vendor { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub use x86::*; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + pub use arm::*; } #[macro_use] @@ -31,3 +34,6 @@ mod v512; mod v64; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod x86; + +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + mod arm; diff --git a/src/x86/bmi2.rs b/src/x86/bmi2.rs index 321df40777f13..67f8740399e43 100644 --- a/src/x86/bmi2.rs +++ b/src/x86/bmi2.rs @@ -2,7 +2,7 @@ //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, -//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf). +//! A-Z](http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectu res-software-developer-instruction-set-reference-manual-325383.pdf). //! //! [Wikipedia](https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#BMI2_.28Bit_Manipulation_Instruction_Set_2.29) //! provides a quick overview of the available instructions. @@ -15,6 +15,8 @@ use assert_instr::assert_instr; /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. #[inline(always)] +// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 +#[cfg_attr(test, assert_instr(imul))] #[target_feature = "+bmi2"] pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { let result: u64 = (a as u64) * (b as u64); @@ -27,6 +29,7 @@ pub fn _mulx_u32(a: u32, b: u32) -> (u32, u32) { /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. #[inline(always)] +#[cfg_attr(test, assert_instr(mulx))] #[target_feature = "+bmi2"] pub fn _mulx_u64(a: u64, b: u64) -> (u64, u64) { let result: u128 = (a as u128) * (b as u128);