From 5b06106ad7240b4a14d8a9ff20c8847f8162b7aa Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sun, 14 Jun 2020 16:29:25 -0400 Subject: [PATCH 1/3] use blake2b_simd and blake2s_simd internally Replace the internal implementation of BLAKE2b and BLAKE2s with calls to the blake2b_simd and blake2s_simd crates. Those crates contain optimized implementations for SSE4.1 and AVX2, and they use runtime CPU feature detection to select the best implementation. Running the long-input benchmarks on an Intel i9-9880H with AVX2 support, this change is a performance improvement of about 1.5x for BLAKE2b and 1.35x for BLAKE2s. This change deletes the undocumented `with_parameter_block` method, as the raw parameter block is not exposed by blake2b_simd or blak2s_simd. Callers who need BLAKE2 tree mode parameters can use the upstream crates directly. They provide a complete set of parameter methods. This change also deletes the `finalize_last_node` method. This method was arguably attached to the wrong types, `VarBlake2b` and `VarBlake2s`, where it would panic with a non-default output length. It's not very useful without the other tree parameters, so rather than moving it to the fixed-length `Blake2b` and `Blake2s` types where it belongs, we just delete it. This also simplifies the addition of BLAKE2bp and BLAKE2sp support in the following commit, as those algorithms use the last node flag internally and cannot expose it. --- Cargo.lock | 44 ++++- blake2/Cargo.toml | 4 +- blake2/src/as_bytes.rs | 44 ----- blake2/src/blake2.rs | 314 +++++------------------------- blake2/src/blake2b.rs | 13 +- blake2/src/blake2s.rs | 13 +- blake2/src/consts.rs | 47 ----- blake2/src/lib.rs | 5 - blake2/src/simd.rs | 138 ------------- blake2/src/simd/simd_opt.rs | 51 ----- blake2/src/simd/simd_opt/u32x4.rs | 67 ------- blake2/src/simd/simd_opt/u64x4.rs | 140 ------------- blake2/src/simd/simdint.rs | 22 --- blake2/src/simd/simdop.rs | 103 ---------- blake2/src/simd/simdty.rs | 77 -------- 15 files changed, 103 insertions(+), 979 deletions(-) delete mode 100644 blake2/src/as_bytes.rs delete mode 100644 blake2/src/consts.rs delete mode 100644 blake2/src/simd.rs delete mode 100644 blake2/src/simd/simd_opt.rs delete mode 100644 blake2/src/simd/simd_opt/u32x4.rs delete mode 100644 blake2/src/simd/simd_opt/u64x4.rs delete mode 100644 blake2/src/simd/simdint.rs delete mode 100644 blake2/src/simd/simdop.rs delete mode 100644 blake2/src/simd/simdty.rs diff --git a/Cargo.lock b/Cargo.lock index d95171a9b..0e0124dc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,15 +1,51 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff77d8686867eceff3105329d4698d96c2391c176d5d03adc90c7389162b5b8" + [[package]] name = "blake2" version = "0.9.0" dependencies = [ + "blake2b_simd", + "blake2s_simd", "crypto-mac", "digest", "hex-literal", "opaque-debug", ] +[[package]] +name = "blake2b_simd" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + +[[package]] +name = "blake2s_simd" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab9e07352b829279624ceb7c64adb4f585dacdb81d35cafae81139ccd617cf44" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + [[package]] name = "blobby" version = "0.1.2" @@ -47,6 +83,12 @@ version = "1.0.54" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7bbb73db36c1246e9034e307d0fba23f9a2e251faa47ade70c1bd252220c8311" +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "crypto-mac" version = "0.8.0" @@ -79,7 +121,7 @@ dependencies = [ [[package]] name = "gost94" -version = "0.8.0" +version = "0.9.0" dependencies = [ "block-buffer", "digest", diff --git a/blake2/Cargo.toml b/blake2/Cargo.toml index 6e2d7cb70..d614b0843 100644 --- a/blake2/Cargo.toml +++ b/blake2/Cargo.toml @@ -12,6 +12,8 @@ keywords = ["crypto", "blake2", "hash", "digest"] categories = ["cryptography", "no-std"] [dependencies] +blake2b_simd = { version = "0.5.10", default-features = false } +blake2s_simd = { version = "0.5.10", default-features = false } digest = "0.9" crypto-mac = "0.8" opaque-debug = "0.3" @@ -23,7 +25,7 @@ hex-literal = "0.2" [features] default = ["std"] -std = ["digest/std", "crypto-mac/std"] +std = ["digest/std", "crypto-mac/std", "blake2b_simd/std", "blake2s_simd/std"] simd = [] simd_opt = ["simd"] simd_asm = ["simd_opt"] diff --git a/blake2/src/as_bytes.rs b/blake2/src/as_bytes.rs deleted file mode 100644 index 02cca6bba..000000000 --- a/blake2/src/as_bytes.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2016 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -use core::mem; -use core::slice; - -pub unsafe trait Safe {} - -pub trait AsBytes { - fn as_bytes(&self) -> &[u8]; - fn as_mut_bytes(&mut self) -> &mut [u8]; -} - -impl AsBytes for [T] { - #[inline] - fn as_bytes(&self) -> &[u8] { - unsafe { - slice::from_raw_parts(self.as_ptr() as *const u8, self.len() * mem::size_of::()) - } - } - - #[inline] - fn as_mut_bytes(&mut self) -> &mut [u8] { - unsafe { - slice::from_raw_parts_mut( - self.as_mut_ptr() as *mut u8, - self.len() * mem::size_of::(), - ) - } - } -} - -unsafe impl Safe for u8 {} -unsafe impl Safe for u16 {} -unsafe impl Safe for u32 {} -unsafe impl Safe for u64 {} -unsafe impl Safe for i8 {} -unsafe impl Safe for i16 {} -unsafe impl Safe for i32 {} -unsafe impl Safe for i64 {} diff --git a/blake2/src/blake2.rs b/blake2/src/blake2.rs index 6469448aa..568c57623 100644 --- a/blake2/src/blake2.rs +++ b/blake2/src/blake2.rs @@ -1,75 +1,24 @@ macro_rules! blake2_impl { ( - $state:ident, $fix_state:ident, $word:ident, $vec:ident, $bytes:ident, - $block_size:ident, $R1:expr, $R2:expr, $R3:expr, $R4:expr, $IV:expr, - $vardoc:expr, $doc:expr, + $state:ident, $fix_state:ident, $upstream_state:ty, + $upstream_params:ty, $block_bytes_typenum:ty, $key_bytes_typenum:ty, + $out_bytes_typenum:ty, $vardoc:expr, $doc:expr, ) => { - use $crate::as_bytes::AsBytes; - use $crate::simd::{Vector4, $vec}; - use digest::{Update, BlockInput, FixedOutputDirty, VariableOutputDirty, Reset}; use digest::InvalidOutputSize; use digest::generic_array::GenericArray; - use digest::generic_array::typenum::{U4, Unsigned}; - use core::{cmp, convert::TryInto, ops::Div}; + use digest::generic_array::typenum::Unsigned; use crypto_mac::{InvalidKeyLength, Mac, NewMac}; - type Output = GenericArray; + type Output = GenericArray; #[derive(Clone)] #[doc=$vardoc] pub struct $state { - m: [$word; 16], - h: [$vec; 2], - t: u64, - n: usize, - - h0: [$vec; 2], - m0: [$word; 16], - t0: u64, - } - - #[inline(always)] - fn iv0() -> $vec { $vec::new($IV[0], $IV[1], $IV[2], $IV[3]) } - #[inline(always)] - fn iv1() -> $vec { $vec::new($IV[4], $IV[5], $IV[6], $IV[7]) } - - #[inline(always)] - fn quarter_round(v: &mut [$vec; 4], rd: u32, rb: u32, m: $vec) { - v[0] = v[0].wrapping_add(v[1]).wrapping_add(m.from_le()); - v[3] = (v[3] ^ v[0]).rotate_right_const(rd); - v[2] = v[2].wrapping_add(v[3]); - v[1] = (v[1] ^ v[2]).rotate_right_const(rb); - } - - #[inline(always)] - fn shuffle(v: &mut [$vec; 4]) { - v[1] = v[1].shuffle_left_1(); - v[2] = v[2].shuffle_left_2(); - v[3] = v[3].shuffle_left_3(); - } - - #[inline(always)] - fn unshuffle(v: &mut [$vec; 4]) { - v[1] = v[1].shuffle_right_1(); - v[2] = v[2].shuffle_right_2(); - v[3] = v[3].shuffle_right_3(); - } - - #[inline(always)] - fn round(v: &mut [$vec; 4], m: &[$word; 16], s: &[usize; 16]) { - quarter_round(v, $R1, $R2, $vec::gather(m, - s[ 0], s[ 2], s[ 4], s[ 6])); - quarter_round(v, $R3, $R4, $vec::gather(m, - s[ 1], s[ 3], s[ 5], s[ 7])); - - shuffle(v); - quarter_round(v, $R1, $R2, $vec::gather(m, - s[ 8], s[10], s[12], s[14])); - quarter_round(v, $R3, $R4, $vec::gather(m, - s[ 9], s[11], s[13], s[15])); - unshuffle(v); + upstream_params: $upstream_params, + upstream_state: $upstream_state, + output_size: usize, } impl $state { @@ -84,188 +33,30 @@ macro_rules! blake2_impl { /// Creates a new hashing context with the full set of sequential-mode parameters. pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8], output_size: usize) -> Self { - let kk = key.len(); - assert!(kk <= $bytes::to_usize()); - assert!(output_size <= $bytes::to_usize()); - - // The number of bytes needed to express two words. - let length = $bytes::to_usize()/4; - assert!(salt.len() <= length); - assert!(persona.len() <= length); - - // Build a parameter block - let mut p = [0 as $word; 8]; - p[0] = 0x0101_0000 ^ ((kk as $word) << 8) ^ - (output_size as $word); - - // salt is two words long - if salt.len() < length { - let mut padded_salt = GenericArray::>::Output>::default(); - for i in 0..salt.len() { - padded_salt[i] = salt[i]; - } - p[4] = $word::from_le_bytes(padded_salt[0 .. length/2].try_into().unwrap()); - p[5] = $word::from_le_bytes(padded_salt[length/2 .. padded_salt.len()].try_into().unwrap()); - } else { - p[4] = $word::from_le_bytes(salt[0 .. salt.len()/2].try_into().unwrap()); - p[5] = $word::from_le_bytes(salt[salt.len()/2 .. salt.len()].try_into().unwrap()); - } + let mut upstream_params = <$upstream_params>::new(); + upstream_params + .key(key) + .salt(salt) + .personal(persona) + .hash_length(output_size); - // persona is also two words long - if persona.len() < length { - let mut padded_persona = GenericArray::>::Output>::default(); - for i in 0..persona.len() { - padded_persona[i] = persona[i]; - } - p[6] = $word::from_le_bytes(padded_persona[0 .. length/2].try_into().unwrap()); - p[7] = $word::from_le_bytes(padded_persona[length/2 .. padded_persona.len()].try_into().unwrap()); - } else { - p[6] = $word::from_le_bytes(persona[0 .. length/2].try_into().unwrap()); - p[7] = $word::from_le_bytes(persona[length/2 .. persona.len()].try_into().unwrap()); - } + let upstream_state = upstream_params.to_state(); - let mut state = Self::with_parameter_block(&p); - - if kk > 0 { - copy(key, state.m.as_mut_bytes()); - state.t = 2 * $bytes::to_u64(); - } - - state.t0 = state.t; - state.m0 = state.m; - state - } - - #[doc(hidden)] - pub fn with_parameter_block(p: &[$word; 8]) -> Self { - let nn = p[0] as u8 as usize; - let kk = (p[0] >> 8) as u8 as usize; - assert!(nn >= 1 && nn <= $bytes::to_usize()); - assert!(kk <= $bytes::to_usize()); - - let h0 = [ - iv0() ^ $vec::new(p[0], p[1], p[2], p[3]), - iv1() ^ $vec::new(p[4], p[5], p[6], p[7]), - ]; - - $state { - m: [0; 16], - h: h0, - t: 0, - n: nn, - - t0: 0, - m0: [0; 16], - h0: h0, - } + Self { upstream_params, upstream_state, output_size } } /// Updates the hashing context with more data. fn update(&mut self, data: &[u8]) { - let mut rest = data; - - let block = 2 * $bytes::to_usize(); - - let off = self.t as usize % block; - if off != 0 || self.t == 0 { - let len = cmp::min(block - off, rest.len()); - - let part = &rest[..len]; - rest = &rest[part.len()..]; - - copy(part, &mut self.m.as_mut_bytes()[off..]); - self.t = self.t.checked_add(part.len() as u64) - .expect("hash data length overflow"); - } - - while rest.len() >= block { - self.compress(0, 0); - - let part = &rest[..block]; - rest = &rest[part.len()..]; - - copy(part, &mut self.m.as_mut_bytes()); - self.t = self.t.checked_add(part.len() as u64) - .expect("hash data length overflow"); - } - - let n = rest.len(); - if n > 0 { - self.compress(0, 0); - - copy(rest, &mut self.m.as_mut_bytes()); - self.t = self.t.checked_add(rest.len() as u64) - .expect("hash data length overflow"); - } - } - - #[doc(hidden)] - pub fn finalize_last_node(mut self) -> Output { - self.finalize_with_flag(!0) - } - - - fn finalize_with_flag(&mut self, f1: $word) -> Output { - let off = self.t as usize % (2 * $bytes::to_usize()); - if off != 0 { - self.m.as_mut_bytes()[off..].iter_mut().for_each(|b| *b = 0); - } - - self.compress(!0, f1); - - let buf = [self.h[0].to_le(), self.h[1].to_le()]; - - let mut out = GenericArray::default(); - copy(buf.as_bytes(), &mut out); - out - } - - fn compress(&mut self, f0: $word, f1: $word) { - use $crate::consts::SIGMA; - - let m = &self.m; - let h = &mut self.h; - - let t0 = self.t as $word; - let t1 = match $bytes::to_u8() { - 64 => 0, - 32 => (self.t >> 32) as $word, - _ => unreachable!(), - }; - - let mut v = [ - h[0], - h[1], - iv0(), - iv1() ^ $vec::new(t0, t1, f0, f1), - ]; - - round(&mut v, m, &SIGMA[0]); - round(&mut v, m, &SIGMA[1]); - round(&mut v, m, &SIGMA[2]); - round(&mut v, m, &SIGMA[3]); - round(&mut v, m, &SIGMA[4]); - round(&mut v, m, &SIGMA[5]); - round(&mut v, m, &SIGMA[6]); - round(&mut v, m, &SIGMA[7]); - round(&mut v, m, &SIGMA[8]); - round(&mut v, m, &SIGMA[9]); - if $bytes::to_u8() == 64 { - round(&mut v, m, &SIGMA[0]); - round(&mut v, m, &SIGMA[1]); - } - - h[0] = h[0] ^ (v[0] ^ v[2]); - h[1] = h[1] ^ (v[1] ^ v[3]); + self.upstream_state.update(data); } } impl Default for $state { - fn default() -> Self { Self::new_keyed(&[], $bytes::to_usize()) } + fn default() -> Self { Self::new_keyed(&[], <$out_bytes_typenum>::to_usize()) } } impl BlockInput for $state { - type BlockSize = $block_size; + type BlockSize = $block_bytes_typenum; } impl Update for $state { @@ -276,120 +67,113 @@ macro_rules! blake2_impl { impl VariableOutputDirty for $state { fn new(output_size: usize) -> Result { - if output_size == 0 || output_size > $bytes::to_usize() { + if output_size == 0 || output_size > <$out_bytes_typenum>::to_usize() { return Err(InvalidOutputSize); } Ok(Self::new_keyed(&[], output_size)) } fn output_size(&self) -> usize { - self.n + self.output_size } fn finalize_variable_dirty(&mut self, f: impl FnOnce(&[u8])) { - let n = self.n; - let res = self.finalize_with_flag(0); - f(&res[..n]); + f(self.upstream_state.finalize().as_bytes()); } } impl Reset for $state { fn reset(&mut self) { - self.t = self.t0; - self.m = self.m0; - self.h = self.h0; + self.upstream_state = self.upstream_params.to_state(); } } opaque_debug::implement!($state); digest::impl_write!($state); - #[derive(Clone)] #[doc=$doc] pub struct $fix_state { - state: $state, + upstream_params: $upstream_params, + upstream_state: $upstream_state, } impl $fix_state { /// Creates a new hashing context with the full set of sequential-mode parameters. pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8]) -> Self { - let state = $state::with_params(key, salt, persona, $bytes::to_usize()); - Self { state } + let mut upstream_params = <$upstream_params>::new(); + upstream_params + .key(key) + .salt(salt) + .personal(persona); + + let upstream_state = upstream_params.to_state(); + + Self { upstream_params, upstream_state } } } impl Default for $fix_state { fn default() -> Self { - let state = $state::new_keyed(&[], $bytes::to_usize()); - Self { state } + Self::with_params(&[], &[], &[]) } } impl BlockInput for $fix_state { - type BlockSize = $block_size; + type BlockSize = $block_bytes_typenum; } impl Update for $fix_state { fn update(&mut self, data: impl AsRef<[u8]>) { - self.state.update(data.as_ref()); + self.upstream_state.update(data.as_ref()); } } impl FixedOutputDirty for $fix_state { - type OutputSize = $bytes; + type OutputSize = $out_bytes_typenum; fn finalize_into_dirty(&mut self, out: &mut Output) { - out.copy_from_slice(&self.state.finalize_with_flag(0)); + out.copy_from_slice(self.upstream_state.finalize().as_bytes()); } } impl Reset for $fix_state { fn reset(&mut self) { - self.state.reset() + self.upstream_state = self.upstream_params.to_state(); } } impl NewMac for $fix_state { - type KeySize = $bytes; + type KeySize = $key_bytes_typenum; - fn new(key: &GenericArray) -> Self { - let state = $state::new_keyed(key, $bytes::to_usize()); - Self { state } + fn new(key: &GenericArray) -> Self { + Self::with_params(&key[..], &[], &[]) } fn new_varkey(key: &[u8]) -> Result { - if key.len() > $bytes::to_usize() { + if key.len() > <$key_bytes_typenum>::to_usize() { Err(InvalidKeyLength) } else { - let state = $state::new_keyed(key, $bytes::to_usize()); - Ok(Self { state }) + Ok(Self::with_params(key, &[], &[])) } } } impl Mac for $fix_state { - type OutputSize = $bytes; + type OutputSize = $out_bytes_typenum; - fn update(&mut self, data: &[u8]) { self.state.update(data); } + fn update(&mut self, data: &[u8]) { self.upstream_state.update(data); } fn reset(&mut self) { ::reset(self) } - fn finalize(mut self) -> crypto_mac::Output { - crypto_mac::Output::new(self.state.finalize_with_flag(0)) + fn finalize(self) -> crypto_mac::Output { + crypto_mac::Output::new(GenericArray::clone_from_slice(self.upstream_state.finalize().as_bytes())) } } opaque_debug::implement!($fix_state); digest::impl_write!($fix_state); - - fn copy(src: &[u8], dst: &mut [u8]) { - assert!(dst.len() >= src.len()); - unsafe { - core::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_mut_ptr(), src.len()); - } - } } } diff --git a/blake2/src/blake2b.rs b/blake2/src/blake2b.rs index 3afc18d84..24220a82a 100644 --- a/blake2/src/blake2b.rs +++ b/blake2/src/blake2b.rs @@ -1,18 +1,13 @@ -use crate::consts::BLAKE2B_IV; use digest::generic_array::typenum::{U128, U64}; blake2_impl!( VarBlake2b, Blake2b, - u64, - u64x4, - U64, + blake2b_simd::State, + blake2b_simd::Params, U128, - 32, - 24, - 16, - 63, - BLAKE2B_IV, + U64, + U64, "Blake2b instance with a variable output.", "Blake2b instance with a fixed output.", ); diff --git a/blake2/src/blake2s.rs b/blake2/src/blake2s.rs index 5306ed992..bd8735640 100644 --- a/blake2/src/blake2s.rs +++ b/blake2/src/blake2s.rs @@ -1,18 +1,13 @@ -use crate::consts::BLAKE2S_IV; use digest::generic_array::typenum::{U32, U64}; blake2_impl!( VarBlake2s, Blake2s, - u32, - u32x4, - U32, + blake2s_simd::State, + blake2s_simd::Params, U64, - 16, - 12, - 8, - 7, - BLAKE2S_IV, + U32, + U32, "Blake2s instance with a variable output.", "Blake2s instance with a fixed output.", ); diff --git a/blake2/src/consts.rs b/blake2/src/consts.rs deleted file mode 100644 index ab76c0e17..000000000 --- a/blake2/src/consts.rs +++ /dev/null @@ -1,47 +0,0 @@ -#![allow(clippy::unreadable_literal)] - -pub static SIGMA: [[usize; 16]; 12] = [ - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3], - [11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4], - [7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8], - [9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13], - [2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9], - [12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11], - [13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10], - [6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5], - [10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0], - [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], - [14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3], -]; - -pub static BLAKE2B_IV: [u64; 8] = [ - 0x6a09e667f3bcc908, - 0xbb67ae8584caa73b, - 0x3c6ef372fe94f82b, - 0xa54ff53a5f1d36f1, - 0x510e527fade682d1, - 0x9b05688c2b3e6c1f, - 0x1f83d9abfb41bd6b, - 0x5be0cd19137e2179, -]; - -/* -pub const BLAKE2B_BLOCKBYTES : usize = 128; -pub const BLAKE2B_OUTBYTES : usize = 64; -pub const BLAKE2B_KEYBYTES : usize = 64; -pub const BLAKE2B_SALTBYTES : usize = 16; -pub const BLAKE2B_PERSONALBYTES : usize = 16; -*/ - -pub static BLAKE2S_IV: [u32; 8] = [ - 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, -]; - -/* -pub const BLAKE2S_BLOCKBYTES : usize = 64; -pub const BLAKE2S_OUTBYTES : usize = 32; -pub const BLAKE2S_KEYBYTES : usize = 32; -pub const BLAKE2S_SALTBYTES : usize = 8; -pub const BLAKE2S_PERSONALBYTES : usize = 8; -*/ diff --git a/blake2/src/lib.rs b/blake2/src/lib.rs index a58d6ec2f..356cea614 100644 --- a/blake2/src/lib.rs +++ b/blake2/src/lib.rs @@ -90,11 +90,6 @@ #[cfg(feature = "std")] extern crate std; -mod as_bytes; -mod consts; - -mod simd; - #[macro_use] mod blake2; diff --git a/blake2/src/simd.rs b/blake2/src/simd.rs deleted file mode 100644 index 0d3dfd346..000000000 --- a/blake2/src/simd.rs +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -mod simd_opt; -mod simdint; -mod simdop; -mod simdty; - -pub use self::simdty::{u32x4, u64x4}; - -pub trait Vector4: Copy { - fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self; - - fn from_le(self) -> Self; - fn to_le(self) -> Self; - - fn wrapping_add(self, rhs: Self) -> Self; - - fn rotate_right_const(self, n: u32) -> Self; - - fn shuffle_left_1(self) -> Self; - fn shuffle_left_2(self) -> Self; - fn shuffle_left_3(self) -> Self; - - #[inline(always)] - fn shuffle_right_1(self) -> Self { - self.shuffle_left_3() - } - #[inline(always)] - fn shuffle_right_2(self) -> Self { - self.shuffle_left_2() - } - #[inline(always)] - fn shuffle_right_3(self) -> Self { - self.shuffle_left_1() - } -} - -macro_rules! impl_vector4 { - ($vec:ident, $word:ident) => { - impl Vector4<$word> for $vec { - #[inline(always)] - fn gather(src: &[$word], i0: usize, i1: usize, i2: usize, i3: usize) -> Self { - $vec::new(src[i0], src[i1], src[i2], src[i3]) - } - - #[cfg(target_endian = "little")] - #[inline(always)] - fn from_le(self) -> Self { - self - } - - #[cfg(not(target_endian = "little"))] - #[inline(always)] - fn from_le(self) -> Self { - $vec::new( - $word::from_le(self.0), - $word::from_le(self.1), - $word::from_le(self.2), - $word::from_le(self.3), - ) - } - - #[cfg(target_endian = "little")] - #[inline(always)] - fn to_le(self) -> Self { - self - } - - #[cfg(not(target_endian = "little"))] - #[inline(always)] - fn to_le(self) -> Self { - $vec::new( - self.0.to_le(), - self.1.to_le(), - self.2.to_le(), - self.3.to_le(), - ) - } - - #[inline(always)] - fn wrapping_add(self, rhs: Self) -> Self { - self + rhs - } - - #[inline(always)] - fn rotate_right_const(self, n: u32) -> Self { - simd_opt::$vec::rotate_right_const(self, n) - } - - #[cfg(feature = "simd")] - #[inline(always)] - fn shuffle_left_1(self) -> Self { - use crate::simd::simdint::simd_shuffle4; - unsafe { simd_shuffle4(self, self, [1, 2, 3, 0]) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn shuffle_left_1(self) -> Self { - $vec::new(self.1, self.2, self.3, self.0) - } - - #[cfg(feature = "simd")] - #[inline(always)] - fn shuffle_left_2(self) -> Self { - use crate::simd::simdint::simd_shuffle4; - unsafe { simd_shuffle4(self, self, [2, 3, 0, 1]) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn shuffle_left_2(self) -> Self { - $vec::new(self.2, self.3, self.0, self.1) - } - - #[cfg(feature = "simd")] - #[inline(always)] - fn shuffle_left_3(self) -> Self { - use crate::simd::simdint::simd_shuffle4; - unsafe { simd_shuffle4(self, self, [3, 0, 1, 2]) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn shuffle_left_3(self) -> Self { - $vec::new(self.3, self.0, self.1, self.2) - } - } - }; -} - -impl_vector4!(u32x4, u32); -impl_vector4!(u64x4, u64); diff --git a/blake2/src/simd/simd_opt.rs b/blake2/src/simd/simd_opt.rs deleted file mode 100644 index e143ba4ec..000000000 --- a/blake2/src/simd/simd_opt.rs +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -#[allow(unused_macros)] -#[cfg(feature = "simd")] -macro_rules! transmute_shuffle { - ($tmp:ident, $shuffle:ident, $vec:expr, $idx:expr) => { - unsafe { - use crate::simd::simdint::$shuffle; - use crate::simd::simdty::$tmp; - use core::mem::transmute; - - let tmp_i: $tmp = transmute($vec); - let tmp_o: $tmp = $shuffle(tmp_i, tmp_i, $idx); - transmute(tmp_o) - } - }; -} - -#[cfg(feature = "simd")] -pub mod u32x4; -#[cfg(feature = "simd")] -pub mod u64x4; - -#[cfg(not(feature = "simd"))] -macro_rules! simd_opt { - ($vec:ident) => { - pub mod $vec { - use crate::simd::simdty::$vec; - - #[inline(always)] - pub fn rotate_right_const(vec: $vec, n: u32) -> $vec { - $vec::new( - vec.0.rotate_right(n), - vec.1.rotate_right(n), - vec.2.rotate_right(n), - vec.3.rotate_right(n), - ) - } - } - }; -} - -#[cfg(not(feature = "simd"))] -simd_opt!(u32x4); -#[cfg(not(feature = "simd"))] -simd_opt!(u64x4); diff --git a/blake2/src/simd/simd_opt/u32x4.rs b/blake2/src/simd/simd_opt/u32x4.rs deleted file mode 100644 index a3d8e3d6d..000000000 --- a/blake2/src/simd/simd_opt/u32x4.rs +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -use crate::simd::simdty::u32x4; - -#[cfg(feature = "simd_opt")] -#[inline(always)] -pub fn rotate_right_const(vec: u32x4, n: u32) -> u32x4 { - match n { - 16 => rotate_right_16(vec), - 8 => rotate_right_8(vec), - _ => rotate_right_any(vec, n), - } -} - -#[cfg(not(feature = "simd_opt"))] -#[inline(always)] -pub fn rotate_right_const(vec: u32x4, n: u32) -> u32x4 { - rotate_right_any(vec, n) -} - -#[inline(always)] -fn rotate_right_any(vec: u32x4, n: u32) -> u32x4 { - let r = n as u32; - let l = 32 - r; - - (vec >> u32x4::new(r, r, r, r)) ^ (vec << u32x4::new(l, l, l, l)) -} - -#[cfg(feature = "simd_opt")] -#[inline(always)] -fn rotate_right_16(vec: u32x4) -> u32x4 { - if cfg!(target_feature = "ssse3") { - // pshufb (SSSE3) / vpshufb (AVX2) - transmute_shuffle!( - u8x16, - simd_shuffle16, - vec, - [2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13] - ) - } else if cfg!(any(target_feature = "sse2", target_feature = "neon")) { - // pshuflw+pshufhw (SSE2) / vrev (NEON) - transmute_shuffle!(u16x8, simd_shuffle8, vec, [1, 0, 3, 2, 5, 4, 7, 6]) - } else { - rotate_right_any(vec, 16) - } -} - -#[cfg(feature = "simd_opt")] -#[inline(always)] -fn rotate_right_8(vec: u32x4) -> u32x4 { - if cfg!(target_feature = "ssse3") { - // pshufb (SSSE3) / vpshufb (AVX2) - transmute_shuffle!( - u8x16, - simd_shuffle16, - vec, - [1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12] - ) - } else { - rotate_right_any(vec, 8) - } -} diff --git a/blake2/src/simd/simd_opt/u64x4.rs b/blake2/src/simd/simd_opt/u64x4.rs deleted file mode 100644 index 0a6972a87..000000000 --- a/blake2/src/simd/simd_opt/u64x4.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -use crate::simd::simdty::u64x4; - -#[cfg(feature = "simd_opt")] -#[inline(always)] -pub fn rotate_right_const(vec: u64x4, n: u32) -> u64x4 { - match n { - 32 => rotate_right_32(vec), - 24 => rotate_right_24(vec), - 16 => rotate_right_16(vec), - _ => rotate_right_any(vec, n), - } -} - -#[cfg(not(feature = "simd_opt"))] -#[inline(always)] -pub fn rotate_right_const(vec: u64x4, n: u32) -> u64x4 { - rotate_right_any(vec, n) -} - -#[inline(always)] -fn rotate_right_any(vec: u64x4, n: u32) -> u64x4 { - let r = n as u64; - let l = 64 - r; - - (vec >> u64x4::new(r, r, r, r)) ^ (vec << u64x4::new(l, l, l, l)) -} - -#[cfg(feature = "simd_opt")] -#[inline(always)] -fn rotate_right_32(vec: u64x4) -> u64x4 { - if cfg!(any(target_feature = "sse2", target_feature = "neon")) { - // 2 x pshufd (SSE2) / vpshufd (AVX2) / 2 x vrev (NEON) - transmute_shuffle!(u32x8, simd_shuffle8, vec, [1, 0, 3, 2, 5, 4, 7, 6]) - } else { - rotate_right_any(vec, 32) - } -} - -#[cfg(feature = "simd_opt")] -#[inline(always)] -fn rotate_right_24(vec: u64x4) -> u64x4 { - if cfg!(all( - feature = "simd_asm", - target_feature = "neon", - target_arch = "arm" - )) { - // 4 x vext (NEON) - rotate_right_vext(vec, 3) - } else if cfg!(target_feature = "ssse3") { - // 2 x pshufb (SSSE3) / vpshufb (AVX2) - transmute_shuffle!( - u8x32, - simd_shuffle32, - vec, - [ - 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 19, 20, 21, 22, 23, 16, 17, - 18, 27, 28, 29, 30, 31, 24, 25, 26 - ] - ) - } else { - rotate_right_any(vec, 24) - } -} - -#[cfg(feature = "simd_opt")] -#[inline(always)] -fn rotate_right_16(vec: u64x4) -> u64x4 { - if cfg!(all( - feature = "simd_asm", - target_feature = "neon", - target_arch = "arm" - )) { - // 4 x vext (NEON) - rotate_right_vext(vec, 2) - } else if cfg!(target_feature = "ssse3") { - // 2 x pshufb (SSSE3) / vpshufb (AVX2) - transmute_shuffle!( - u8x32, - simd_shuffle32, - vec, - [ - 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 18, 19, 20, 21, 22, 23, 16, - 17, 26, 27, 28, 29, 30, 31, 24, 25 - ] - ) - } else if cfg!(target_feature = "sse2") { - // 2 x pshuflw+pshufhw (SSE2) - transmute_shuffle!( - u16x16, - simd_shuffle16, - vec, - [1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12] - ) - } else { - rotate_right_any(vec, 16) - } -} - -#[cfg(all(feature = "simd_asm", target_feature = "neon", target_arch = "arm"))] -mod simd_asm_neon_arm { - use crate::simd::simdty::{u64x2, u64x4}; - - #[inline(always)] - fn vext_u64(vec: u64x2, b: u8) -> u64x2 { - unsafe { - let result: u64x2; - asm!("vext.8 ${0:e}, ${1:e}, ${1:e}, $2\nvext.8 ${0:f}, ${1:f}, ${1:f}, $2" - : "=w" (result) - : "w" (vec), "n" (b)); - result - } - } - - #[inline(always)] - pub fn rotate_right_vext(vec: u64x4, b: u8) -> u64x4 { - use crate::simd::simdint::{simd_shuffle2, simd_shuffle4}; - - unsafe { - let tmp0 = vext_u64(simd_shuffle2(vec, vec, [0, 1]), b); - let tmp1 = vext_u64(simd_shuffle2(vec, vec, [2, 3]), b); - simd_shuffle4(tmp0, tmp1, [0, 1, 2, 3]) - } - } -} - -#[cfg(all(feature = "simd_asm", target_feature = "neon", target_arch = "arm"))] -use self::simd_asm_neon_arm::rotate_right_vext; - -#[cfg(feature = "simd_opt")] -#[cfg(not(all(feature = "simd_asm", target_feature = "neon", target_arch = "arm")))] -fn rotate_right_vext(_vec: u64x4, _n: u8) -> u64x4 { - unreachable!() -} diff --git a/blake2/src/simd/simdint.rs b/blake2/src/simd/simdint.rs deleted file mode 100644 index d876d5538..000000000 --- a/blake2/src/simd/simdint.rs +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -#![allow(dead_code)] - -#[cfg(feature = "simd")] -extern "platform-intrinsic" { - pub fn simd_add(x: T, y: T) -> T; - pub fn simd_shl(x: T, y: T) -> T; - pub fn simd_shr(x: T, y: T) -> T; - pub fn simd_xor(x: T, y: T) -> T; - - pub fn simd_shuffle2(v: T, w: T, idx: [u32; 2]) -> U; - pub fn simd_shuffle4(v: T, w: T, idx: [u32; 4]) -> U; - pub fn simd_shuffle8(v: T, w: T, idx: [u32; 8]) -> U; - pub fn simd_shuffle16(v: T, w: T, idx: [u32; 16]) -> U; - pub fn simd_shuffle32(v: T, w: T, idx: [u32; 32]) -> U; -} diff --git a/blake2/src/simd/simdop.rs b/blake2/src/simd/simdop.rs deleted file mode 100644 index 891456d9c..000000000 --- a/blake2/src/simd/simdop.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright 2015 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -#[cfg(feature = "simd")] -use crate::simd::simdint; -use crate::simd::simdty::{u32x4, u64x4}; - -use core::ops::{Add, BitXor, Shl, Shr}; - -macro_rules! impl_ops { - ($vec:ident) => { - impl Add for $vec { - type Output = Self; - - #[cfg(feature = "simd")] - #[inline(always)] - fn add(self, rhs: Self) -> Self::Output { - unsafe { simdint::simd_add(self, rhs) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn add(self, rhs: Self) -> Self::Output { - $vec::new( - self.0.wrapping_add(rhs.0), - self.1.wrapping_add(rhs.1), - self.2.wrapping_add(rhs.2), - self.3.wrapping_add(rhs.3), - ) - } - } - - impl BitXor for $vec { - type Output = Self; - - #[cfg(feature = "simd")] - #[inline(always)] - fn bitxor(self, rhs: Self) -> Self::Output { - unsafe { simdint::simd_xor(self, rhs) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn bitxor(self, rhs: Self) -> Self::Output { - $vec::new( - self.0 ^ rhs.0, - self.1 ^ rhs.1, - self.2 ^ rhs.2, - self.3 ^ rhs.3, - ) - } - } - - impl Shl<$vec> for $vec { - type Output = Self; - - #[cfg(feature = "simd")] - #[inline(always)] - fn shl(self, rhs: Self) -> Self::Output { - unsafe { simdint::simd_shl(self, rhs) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn shl(self, rhs: Self) -> Self::Output { - $vec::new( - self.0 << rhs.0, - self.1 << rhs.1, - self.2 << rhs.2, - self.3 << rhs.3, - ) - } - } - - impl Shr<$vec> for $vec { - type Output = Self; - - #[cfg(feature = "simd")] - #[inline(always)] - fn shr(self, rhs: Self) -> Self::Output { - unsafe { simdint::simd_shr(self, rhs) } - } - - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn shr(self, rhs: Self) -> Self::Output { - $vec::new( - self.0 >> rhs.0, - self.1 >> rhs.1, - self.2 >> rhs.2, - self.3 >> rhs.3, - ) - } - } - }; -} - -impl_ops!(u32x4); -impl_ops!(u64x4); diff --git a/blake2/src/simd/simdty.rs b/blake2/src/simd/simdty.rs deleted file mode 100644 index 008b8b48c..000000000 --- a/blake2/src/simd/simdty.rs +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2016 blake2-rfc Developers -// -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. - -#![allow(dead_code, non_camel_case_types)] - -use crate::as_bytes::Safe; - -#[cfg(feature = "simd")] -macro_rules! decl_simd { - ($($decl:item)*) => { - $( - #[derive(Clone, Copy, Debug)] - #[repr(simd)] - $decl - )* - } -} - -#[cfg(not(feature = "simd"))] -macro_rules! decl_simd { - ($($decl:item)*) => { - $( - #[derive(Clone, Copy, Debug)] - #[repr(C)] - $decl - )* - } -} - -decl_simd! { - pub struct Simd2(pub T, pub T); - pub struct Simd4(pub T, pub T, pub T, pub T); - pub struct Simd8(pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T); - pub struct Simd16(pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T); - pub struct Simd32(pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T, - pub T, pub T, pub T, pub T); -} - -pub type u64x2 = Simd2; - -pub type u32x4 = Simd4; -pub type u64x4 = Simd4; - -pub type u16x8 = Simd8; -pub type u32x8 = Simd8; - -pub type u8x16 = Simd16; -pub type u16x16 = Simd16; - -pub type u8x32 = Simd32; - -impl Simd4 { - #[inline(always)] - pub fn new(e0: T, e1: T, e2: T, e3: T) -> Simd4 { - Simd4(e0, e1, e2, e3) - } -} - -unsafe impl Safe for Simd2 {} -unsafe impl Safe for Simd4 {} -unsafe impl Safe for Simd8 {} -unsafe impl Safe for Simd16 {} -unsafe impl Safe for Simd32 {} From 69eff42dbcade6944a51be48f69f5f622281aa6e Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sun, 14 Jun 2020 19:57:58 -0400 Subject: [PATCH 2/3] add support for BLAKE2bp and BLAKE2sp On an Intel i9-9880H with AVX2 support, both BLAKE2bp and BLAKE2sp are about 1.75x faster than BLAKE2b. Note that while these algorithms can be implemented with multi-threading, these implementations from blake2b_simd and blake2s_simd are single-threaded, using only SIMD parallelism. The blake2b_simd and blake2s_simd crates don't support salting or personalization for BLAKE2bp and BLAKE2sp, so the `with_params` methods are moved out into blake2b.rs and blake2s.rs. --- blake2/benches/blake2bp.rs | 4 ++++ blake2/benches/blake2sp.rs | 4 ++++ blake2/src/blake2.rs | 43 ++++++++++++-------------------------- blake2/src/blake2b.rs | 31 +++++++++++++++++++++++++++ blake2/src/blake2bp.rs | 13 ++++++++++++ blake2/src/blake2s.rs | 31 +++++++++++++++++++++++++++ blake2/src/blake2sp.rs | 13 ++++++++++++ blake2/src/lib.rs | 4 ++++ 8 files changed, 113 insertions(+), 30 deletions(-) create mode 100644 blake2/benches/blake2bp.rs create mode 100644 blake2/benches/blake2sp.rs create mode 100644 blake2/src/blake2bp.rs create mode 100644 blake2/src/blake2sp.rs diff --git a/blake2/benches/blake2bp.rs b/blake2/benches/blake2bp.rs new file mode 100644 index 000000000..40da1d382 --- /dev/null +++ b/blake2/benches/blake2bp.rs @@ -0,0 +1,4 @@ +#![no_std] +#![feature(test)] + +digest::bench!(blake2::Blake2bp); diff --git a/blake2/benches/blake2sp.rs b/blake2/benches/blake2sp.rs new file mode 100644 index 000000000..f4c8528af --- /dev/null +++ b/blake2/benches/blake2sp.rs @@ -0,0 +1,4 @@ +#![no_std] +#![feature(test)] + +digest::bench!(blake2::Blake2sp); diff --git a/blake2/src/blake2.rs b/blake2/src/blake2.rs index 568c57623..7562eab40 100644 --- a/blake2/src/blake2.rs +++ b/blake2/src/blake2.rs @@ -28,20 +28,10 @@ macro_rules! blake2_impl { /// make sure to compare codes in constant time! It can be done /// for example by using `subtle` crate. pub fn new_keyed(key: &[u8], output_size: usize) -> Self { - Self::with_params(key, &[], &[], output_size) - } - - /// Creates a new hashing context with the full set of sequential-mode parameters. - pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8], output_size: usize) -> Self { let mut upstream_params = <$upstream_params>::new(); - upstream_params - .key(key) - .salt(salt) - .personal(persona) - .hash_length(output_size); - + upstream_params.key(key); + upstream_params.hash_length(output_size); let upstream_state = upstream_params.to_state(); - Self { upstream_params, upstream_state, output_size } } @@ -98,24 +88,11 @@ macro_rules! blake2_impl { upstream_state: $upstream_state, } - impl $fix_state { - /// Creates a new hashing context with the full set of sequential-mode parameters. - pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8]) -> Self { - let mut upstream_params = <$upstream_params>::new(); - upstream_params - .key(key) - .salt(salt) - .personal(persona); - - let upstream_state = upstream_params.to_state(); - - Self { upstream_params, upstream_state } - } - } - impl Default for $fix_state { fn default() -> Self { - Self::with_params(&[], &[], &[]) + let upstream_params = <$upstream_params>::new(); + let upstream_state = upstream_params.to_state(); + Self { upstream_params, upstream_state } } } @@ -147,14 +124,20 @@ macro_rules! blake2_impl { type KeySize = $key_bytes_typenum; fn new(key: &GenericArray) -> Self { - Self::with_params(&key[..], &[], &[]) + let mut upstream_params = <$upstream_params>::new(); + upstream_params.key(&key[..]); + let upstream_state = upstream_params.to_state(); + Self { upstream_params, upstream_state } } fn new_varkey(key: &[u8]) -> Result { if key.len() > <$key_bytes_typenum>::to_usize() { Err(InvalidKeyLength) } else { - Ok(Self::with_params(key, &[], &[])) + let mut upstream_params = <$upstream_params>::new(); + upstream_params.key(key); + let upstream_state = upstream_params.to_state(); + Ok(Self { upstream_params, upstream_state }) } } } diff --git a/blake2/src/blake2b.rs b/blake2/src/blake2b.rs index 24220a82a..fd63215cc 100644 --- a/blake2/src/blake2b.rs +++ b/blake2/src/blake2b.rs @@ -11,3 +11,34 @@ blake2_impl!( "Blake2b instance with a variable output.", "Blake2b instance with a fixed output.", ); + +impl VarBlake2b { + /// Creates a new hashing context with the full set of sequential-mode parameters. + pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8], output_size: usize) -> Self { + let mut upstream_params = blake2b_simd::Params::new(); + upstream_params + .key(key) + .salt(salt) + .personal(persona) + .hash_length(output_size); + let upstream_state = upstream_params.to_state(); + Self { + upstream_params, + upstream_state, + output_size, + } + } +} + +impl Blake2b { + /// Creates a new hashing context with the full set of sequential-mode parameters. + pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8]) -> Self { + let mut upstream_params = blake2b_simd::Params::new(); + upstream_params.key(key).salt(salt).personal(persona); + let upstream_state = upstream_params.to_state(); + Self { + upstream_params, + upstream_state, + } + } +} diff --git a/blake2/src/blake2bp.rs b/blake2/src/blake2bp.rs new file mode 100644 index 000000000..7d5606aca --- /dev/null +++ b/blake2/src/blake2bp.rs @@ -0,0 +1,13 @@ +use digest::generic_array::typenum::{U128, U64}; + +blake2_impl!( + VarBlake2bp, + Blake2bp, + blake2b_simd::blake2bp::State, + blake2b_simd::blake2bp::Params, + U128, + U64, + U64, + "Blake2bp instance with a variable output.", + "Blake2bp instance with a fixed output.", +); diff --git a/blake2/src/blake2s.rs b/blake2/src/blake2s.rs index bd8735640..0627352ee 100644 --- a/blake2/src/blake2s.rs +++ b/blake2/src/blake2s.rs @@ -11,3 +11,34 @@ blake2_impl!( "Blake2s instance with a variable output.", "Blake2s instance with a fixed output.", ); + +impl VarBlake2s { + /// Creates a new hashing context with the full set of sequential-mode parameters. + pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8], output_size: usize) -> Self { + let mut upstream_params = blake2s_simd::Params::new(); + upstream_params + .key(key) + .salt(salt) + .personal(persona) + .hash_length(output_size); + let upstream_state = upstream_params.to_state(); + Self { + upstream_params, + upstream_state, + output_size, + } + } +} + +impl Blake2s { + /// Creates a new hashing context with the full set of sequential-mode parameters. + pub fn with_params(key: &[u8], salt: &[u8], persona: &[u8]) -> Self { + let mut upstream_params = blake2s_simd::Params::new(); + upstream_params.key(key).salt(salt).personal(persona); + let upstream_state = upstream_params.to_state(); + Self { + upstream_params, + upstream_state, + } + } +} diff --git a/blake2/src/blake2sp.rs b/blake2/src/blake2sp.rs new file mode 100644 index 000000000..16b1ac0bb --- /dev/null +++ b/blake2/src/blake2sp.rs @@ -0,0 +1,13 @@ +use digest::generic_array::typenum::{U32, U64}; + +blake2_impl!( + VarBlake2sp, + Blake2sp, + blake2s_simd::blake2sp::State, + blake2s_simd::blake2sp::Params, + U64, + U32, + U32, + "Blake2sp instance with a variable output.", + "Blake2sp instance with a fixed output.", +); diff --git a/blake2/src/lib.rs b/blake2/src/lib.rs index 356cea614..f4b420c38 100644 --- a/blake2/src/lib.rs +++ b/blake2/src/lib.rs @@ -94,10 +94,14 @@ extern crate std; mod blake2; mod blake2b; +mod blake2bp; mod blake2s; +mod blake2sp; pub use crypto_mac; pub use digest::{self, Digest}; pub use crate::blake2b::{Blake2b, VarBlake2b}; +pub use crate::blake2bp::{Blake2bp, VarBlake2bp}; pub use crate::blake2s::{Blake2s, VarBlake2s}; +pub use crate::blake2sp::{Blake2sp, VarBlake2sp}; From 8ca8053f5929fff3ec3aa63d7617b3e7b8eb4ad9 Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sun, 14 Jun 2020 23:23:29 -0400 Subject: [PATCH 3/3] remove the simd/simd_opt/simd_asm Cargo features for BLAKE2 On x86 targets, SSE4.1 and AVX2 implementations are always compiled. With the `std` feature enabled, runtime CPU feature detection is used to select between them. With `std` disabled (e.g. --no-default-features), the only way to activate SIMD is something like export RUSTFLAGS="-C target-cpu=native" --- .github/workflows/blake2.yml | 12 ------------ blake2/Cargo.toml | 3 --- blake2/src/lib.rs | 2 -- 3 files changed, 17 deletions(-) diff --git a/.github/workflows/blake2.yml b/.github/workflows/blake2.yml index 2f94239c6..48af5a175 100644 --- a/.github/workflows/blake2.yml +++ b/.github/workflows/blake2.yml @@ -53,15 +53,3 @@ jobs: override: true - run: cargo test --no-default-features - run: cargo test - simd: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly - override: true - - run: cargo test --features simd - - run: cargo test --features simd_opt - - run: cargo test --features simd_asm diff --git a/blake2/Cargo.toml b/blake2/Cargo.toml index d614b0843..8de9eac91 100644 --- a/blake2/Cargo.toml +++ b/blake2/Cargo.toml @@ -26,6 +26,3 @@ hex-literal = "0.2" [features] default = ["std"] std = ["digest/std", "crypto-mac/std", "blake2b_simd/std", "blake2s_simd/std"] -simd = [] -simd_opt = ["simd"] -simd_asm = ["simd_opt"] diff --git a/blake2/src/lib.rs b/blake2/src/lib.rs index f4b420c38..a7d78e980 100644 --- a/blake2/src/lib.rs +++ b/blake2/src/lib.rs @@ -84,8 +84,6 @@ #![no_std] #![doc(html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo_small.png")] #![warn(missing_docs, rust_2018_idioms)] -#![cfg_attr(feature = "simd", feature(platform_intrinsics, repr_simd))] -#![cfg_attr(feature = "simd_asm", feature(asm))] #[cfg(feature = "std")] extern crate std;