diff --git a/Cargo.lock b/Cargo.lock index 6f8aa6fa7..361eac726 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -151,6 +151,7 @@ checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" name = "md-5" version = "0.10.5" dependencies = [ + "cfg-if", "digest", "hex-literal", "md5-asm", diff --git a/md5/Cargo.toml b/md5/Cargo.toml index 847530889..e8f89ad1d 100644 --- a/md5/Cargo.toml +++ b/md5/Cargo.toml @@ -16,6 +16,7 @@ name = "md5" [dependencies] digest = "0.10.7" +cfg-if = "1.0" [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies] md5-asm = { version = "0.5", optional = true } @@ -28,4 +29,8 @@ hex-literal = "0.2.2" default = ["std"] std = ["digest/std"] asm = ["md5-asm"] # WARNING: this feature SHOULD NOT be enabled by library crates +# Use assembly backend for LoongArch64 targets +# WARNING: Bumps MSRV to 1.72. This feature SHOULD NOT be enabled by library crates +loongarch64_asm = [] oid = ["digest/oid"] # Enable OID support. WARNING: Bumps MSRV to 1.57 +force-soft = [] # Force software implementation diff --git a/md5/src/compress.rs b/md5/src/compress.rs index 46857038e..c0bcd816a 100644 --- a/md5/src/compress.rs +++ b/md5/src/compress.rs @@ -1,165 +1,14 @@ -#![allow(clippy::many_single_char_names, clippy::unreadable_literal)] -use core::convert::TryInto; - -const RC: [u32; 64] = [ - // round 1 - 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, - 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, - // round 2 - 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, - 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, - // round 3 - 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, - 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, - // round 4 - 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, - 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, -]; - -#[inline(always)] -fn op_f(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { - ((x & y) | (!x & z)) - .wrapping_add(w) - .wrapping_add(m) - .wrapping_add(c) - .rotate_left(s) - .wrapping_add(x) -} -#[inline(always)] -fn op_g(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { - ((x & z) | (y & !z)) - .wrapping_add(w) - .wrapping_add(m) - .wrapping_add(c) - .rotate_left(s) - .wrapping_add(x) -} - -#[inline(always)] -fn op_h(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { - (x ^ y ^ z) - .wrapping_add(w) - .wrapping_add(m) - .wrapping_add(c) - .rotate_left(s) - .wrapping_add(x) -} - -#[inline(always)] -fn op_i(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { - (y ^ (x | !z)) - .wrapping_add(w) - .wrapping_add(m) - .wrapping_add(c) - .rotate_left(s) - .wrapping_add(x) -} - -#[inline] -pub fn compress_block(state: &mut [u32; 4], input: &[u8; 64]) { - let mut a = state[0]; - let mut b = state[1]; - let mut c = state[2]; - let mut d = state[3]; - - let mut data = [0u32; 16]; - for (o, chunk) in data.iter_mut().zip(input.chunks_exact(4)) { - *o = u32::from_le_bytes(chunk.try_into().unwrap()); - } - - // round 1 - a = op_f(a, b, c, d, data[0], RC[0], 7); - d = op_f(d, a, b, c, data[1], RC[1], 12); - c = op_f(c, d, a, b, data[2], RC[2], 17); - b = op_f(b, c, d, a, data[3], RC[3], 22); - - a = op_f(a, b, c, d, data[4], RC[4], 7); - d = op_f(d, a, b, c, data[5], RC[5], 12); - c = op_f(c, d, a, b, data[6], RC[6], 17); - b = op_f(b, c, d, a, data[7], RC[7], 22); - - a = op_f(a, b, c, d, data[8], RC[8], 7); - d = op_f(d, a, b, c, data[9], RC[9], 12); - c = op_f(c, d, a, b, data[10], RC[10], 17); - b = op_f(b, c, d, a, data[11], RC[11], 22); - - a = op_f(a, b, c, d, data[12], RC[12], 7); - d = op_f(d, a, b, c, data[13], RC[13], 12); - c = op_f(c, d, a, b, data[14], RC[14], 17); - b = op_f(b, c, d, a, data[15], RC[15], 22); - - // round 2 - a = op_g(a, b, c, d, data[1], RC[16], 5); - d = op_g(d, a, b, c, data[6], RC[17], 9); - c = op_g(c, d, a, b, data[11], RC[18], 14); - b = op_g(b, c, d, a, data[0], RC[19], 20); - - a = op_g(a, b, c, d, data[5], RC[20], 5); - d = op_g(d, a, b, c, data[10], RC[21], 9); - c = op_g(c, d, a, b, data[15], RC[22], 14); - b = op_g(b, c, d, a, data[4], RC[23], 20); - - a = op_g(a, b, c, d, data[9], RC[24], 5); - d = op_g(d, a, b, c, data[14], RC[25], 9); - c = op_g(c, d, a, b, data[3], RC[26], 14); - b = op_g(b, c, d, a, data[8], RC[27], 20); - - a = op_g(a, b, c, d, data[13], RC[28], 5); - d = op_g(d, a, b, c, data[2], RC[29], 9); - c = op_g(c, d, a, b, data[7], RC[30], 14); - b = op_g(b, c, d, a, data[12], RC[31], 20); - - // round 3 - a = op_h(a, b, c, d, data[5], RC[32], 4); - d = op_h(d, a, b, c, data[8], RC[33], 11); - c = op_h(c, d, a, b, data[11], RC[34], 16); - b = op_h(b, c, d, a, data[14], RC[35], 23); - - a = op_h(a, b, c, d, data[1], RC[36], 4); - d = op_h(d, a, b, c, data[4], RC[37], 11); - c = op_h(c, d, a, b, data[7], RC[38], 16); - b = op_h(b, c, d, a, data[10], RC[39], 23); - - a = op_h(a, b, c, d, data[13], RC[40], 4); - d = op_h(d, a, b, c, data[0], RC[41], 11); - c = op_h(c, d, a, b, data[3], RC[42], 16); - b = op_h(b, c, d, a, data[6], RC[43], 23); - - a = op_h(a, b, c, d, data[9], RC[44], 4); - d = op_h(d, a, b, c, data[12], RC[45], 11); - c = op_h(c, d, a, b, data[15], RC[46], 16); - b = op_h(b, c, d, a, data[2], RC[47], 23); - - // round 4 - a = op_i(a, b, c, d, data[0], RC[48], 6); - d = op_i(d, a, b, c, data[7], RC[49], 10); - c = op_i(c, d, a, b, data[14], RC[50], 15); - b = op_i(b, c, d, a, data[5], RC[51], 21); - - a = op_i(a, b, c, d, data[12], RC[52], 6); - d = op_i(d, a, b, c, data[3], RC[53], 10); - c = op_i(c, d, a, b, data[10], RC[54], 15); - b = op_i(b, c, d, a, data[1], RC[55], 21); - - a = op_i(a, b, c, d, data[8], RC[56], 6); - d = op_i(d, a, b, c, data[15], RC[57], 10); - c = op_i(c, d, a, b, data[6], RC[58], 15); - b = op_i(b, c, d, a, data[13], RC[59], 21); - - a = op_i(a, b, c, d, data[4], RC[60], 6); - d = op_i(d, a, b, c, data[11], RC[61], 10); - c = op_i(c, d, a, b, data[2], RC[62], 15); - b = op_i(b, c, d, a, data[9], RC[63], 21); - - state[0] = state[0].wrapping_add(a); - state[1] = state[1].wrapping_add(b); - state[2] = state[2].wrapping_add(c); - state[3] = state[3].wrapping_add(d); -} - -#[inline] -pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) { - for block in blocks { - compress_block(state, block) +cfg_if::cfg_if! { + if #[cfg(feature = "force-soft")] { + mod soft; + pub use soft::compress; + } else if #[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] { + pub use md5_asm::compress; + } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] { + mod loongarch64_asm; + pub use loongarch64_asm::compress; + } else { + mod soft; + pub use soft::compress; } } diff --git a/md5/src/compress/consts.rs b/md5/src/compress/consts.rs new file mode 100644 index 000000000..2b6d13042 --- /dev/null +++ b/md5/src/compress/consts.rs @@ -0,0 +1,14 @@ +pub const RC: [u32; 64] = [ + // round 1 + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, + 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, + // round 2 + 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a, + // round 3 + 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, + 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + // round 4 + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, + 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, +]; diff --git a/md5/src/compress/loongarch64_asm.rs b/md5/src/compress/loongarch64_asm.rs new file mode 100644 index 000000000..2af53af35 --- /dev/null +++ b/md5/src/compress/loongarch64_asm.rs @@ -0,0 +1,182 @@ +//! LoongArch64 assembly backend + +use core::arch::asm; + +#[path = "consts.rs"] +mod consts; +use consts::*; + +macro_rules! c { + ($($l:expr)*) => { + concat!($($l ,)*) + }; +} + +macro_rules! round0 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "xor $t4," $c "," $d ";" + "and $t4, $t4," $b ";" + "xor $t4, $t4," $d ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round1 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "andn $t4," $c "," $d ";" + "and $t5," $d "," $b ";" + "or $t4, $t4, $t5;" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round2 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "xor $t4," $c "," $d ";" + "xor $t4, $t4," $b ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! round3 { + ($a:literal, $b:literal, $c:literal, $d:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "orn $t4," $b "," $d ";" + "xor $t4, $t4," $c ";" + roundtail!($a, $b, $k, $s, $i) + ) + } +} + +macro_rules! roundtail { + ($a:literal, $b:literal, $k:literal, $s:literal, $i:literal) => { + c!( + "ld.w $t5, $a3," $i " * 4;" + "ld.w $t6, $a1," $k " * 4;" + "add.w " $a "," $a ", $t5;" + "add.w " $a "," $a ", $t6;" + "add.w " $a "," $a ", $t4;" + "rotri.w " $a "," $a ", 32 -" $s ";" + "add.w " $a "," $a "," $b ";" + ) + } +} + +pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) { + if blocks.is_empty() { + return; + } + + unsafe { + asm!( + "42:", + + "move $t0, $a4", + "move $t1, $a5", + "move $t2, $a6", + "move $t3, $a7", + + /* 64 rounds of hashing */ + round0!("$t0", "$t1", "$t2", "$t3", 0, 7, 0), + round0!("$t3", "$t0", "$t1", "$t2", 1, 12, 1), + round0!("$t2", "$t3", "$t0", "$t1", 2, 17, 2), + round0!("$t1", "$t2", "$t3", "$t0", 3, 22, 3), + round0!("$t0", "$t1", "$t2", "$t3", 4, 7, 4), + round0!("$t3", "$t0", "$t1", "$t2", 5, 12, 5), + round0!("$t2", "$t3", "$t0", "$t1", 6, 17, 6), + round0!("$t1", "$t2", "$t3", "$t0", 7, 22, 7), + round0!("$t0", "$t1", "$t2", "$t3", 8, 7, 8), + round0!("$t3", "$t0", "$t1", "$t2", 9, 12, 9), + round0!("$t2", "$t3", "$t0", "$t1", 10, 17, 10), + round0!("$t1", "$t2", "$t3", "$t0", 11, 22, 11), + round0!("$t0", "$t1", "$t2", "$t3", 12, 7, 12), + round0!("$t3", "$t0", "$t1", "$t2", 13, 12, 13), + round0!("$t2", "$t3", "$t0", "$t1", 14, 17, 14), + round0!("$t1", "$t2", "$t3", "$t0", 15, 22, 15), + round1!("$t0", "$t1", "$t2", "$t3", 1, 5, 16), + round1!("$t3", "$t0", "$t1", "$t2", 6, 9, 17), + round1!("$t2", "$t3", "$t0", "$t1", 11, 14, 18), + round1!("$t1", "$t2", "$t3", "$t0", 0, 20, 19), + round1!("$t0", "$t1", "$t2", "$t3", 5, 5, 20), + round1!("$t3", "$t0", "$t1", "$t2", 10, 9, 21), + round1!("$t2", "$t3", "$t0", "$t1", 15, 14, 22), + round1!("$t1", "$t2", "$t3", "$t0", 4, 20, 23), + round1!("$t0", "$t1", "$t2", "$t3", 9, 5, 24), + round1!("$t3", "$t0", "$t1", "$t2", 14, 9, 25), + round1!("$t2", "$t3", "$t0", "$t1", 3, 14, 26), + round1!("$t1", "$t2", "$t3", "$t0", 8, 20, 27), + round1!("$t0", "$t1", "$t2", "$t3", 13, 5, 28), + round1!("$t3", "$t0", "$t1", "$t2", 2, 9, 29), + round1!("$t2", "$t3", "$t0", "$t1", 7, 14, 30), + round1!("$t1", "$t2", "$t3", "$t0", 12, 20, 31), + round2!("$t0", "$t1", "$t2", "$t3", 5, 4, 32), + round2!("$t3", "$t0", "$t1", "$t2", 8, 11, 33), + round2!("$t2", "$t3", "$t0", "$t1", 11, 16, 34), + round2!("$t1", "$t2", "$t3", "$t0", 14, 23, 35), + round2!("$t0", "$t1", "$t2", "$t3", 1, 4, 36), + round2!("$t3", "$t0", "$t1", "$t2", 4, 11, 37), + round2!("$t2", "$t3", "$t0", "$t1", 7, 16, 38), + round2!("$t1", "$t2", "$t3", "$t0", 10, 23, 39), + round2!("$t0", "$t1", "$t2", "$t3", 13, 4, 40), + round2!("$t3", "$t0", "$t1", "$t2", 0, 11, 41), + round2!("$t2", "$t3", "$t0", "$t1", 3, 16, 42), + round2!("$t1", "$t2", "$t3", "$t0", 6, 23, 43), + round2!("$t0", "$t1", "$t2", "$t3", 9, 4, 44), + round2!("$t3", "$t0", "$t1", "$t2", 12, 11, 45), + round2!("$t2", "$t3", "$t0", "$t1", 15, 16, 46), + round2!("$t1", "$t2", "$t3", "$t0", 2, 23, 47), + round3!("$t0", "$t1", "$t2", "$t3", 0, 6, 48), + round3!("$t3", "$t0", "$t1", "$t2", 7, 10, 49), + round3!("$t2", "$t3", "$t0", "$t1", 14, 15, 50), + round3!("$t1", "$t2", "$t3", "$t0", 5, 21, 51), + round3!("$t0", "$t1", "$t2", "$t3", 12, 6, 52), + round3!("$t3", "$t0", "$t1", "$t2", 3, 10, 53), + round3!("$t2", "$t3", "$t0", "$t1", 10, 15, 54), + round3!("$t1", "$t2", "$t3", "$t0", 1, 21, 55), + round3!("$t0", "$t1", "$t2", "$t3", 8, 6, 56), + round3!("$t3", "$t0", "$t1", "$t2", 15, 10, 57), + round3!("$t2", "$t3", "$t0", "$t1", 6, 15, 58), + round3!("$t1", "$t2", "$t3", "$t0", 13, 21, 59), + round3!("$t0", "$t1", "$t2", "$t3", 4, 6, 60), + round3!("$t3", "$t0", "$t1", "$t2", 11, 10, 61), + round3!("$t2", "$t3", "$t0", "$t1", 2, 15, 62), + round3!("$t1", "$t2", "$t3", "$t0", 9, 21, 63), + + "add.w $a4, $a4, $t0", + "add.w $a5, $a5, $t1", + "add.w $a6, $a6, $t2", + "add.w $a7, $a7, $t3", + + // Looping over blocks + "addi.d $a1, $a1, 64", + "addi.d $a2, $a2, -1", + "bnez $a2, 42b", + + inout("$a4") state[0], + inout("$a5") state[1], + inout("$a6") state[2], + inout("$a7") state[3], + inout("$a1") blocks.as_ptr() => _, + inout("$a2") blocks.len() => _, + + in("$a3") RC.as_ptr(), + + // Clobbers + out("$t0") _, + out("$t1") _, + out("$t2") _, + out("$t3") _, + out("$t4") _, + out("$t5") _, + out("$t6") _, + + options(preserves_flags, readonly, pure, nostack), + ); + } +} diff --git a/md5/src/compress/soft.rs b/md5/src/compress/soft.rs new file mode 100644 index 000000000..c41f2bb41 --- /dev/null +++ b/md5/src/compress/soft.rs @@ -0,0 +1,154 @@ +#![allow(clippy::many_single_char_names, clippy::unreadable_literal)] +use core::convert::TryInto; + +#[path = "consts.rs"] +mod consts; +use consts::*; + +#[inline(always)] +fn op_f(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { + ((x & y) | (!x & z)) + .wrapping_add(w) + .wrapping_add(m) + .wrapping_add(c) + .rotate_left(s) + .wrapping_add(x) +} +#[inline(always)] +fn op_g(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { + ((x & z) | (y & !z)) + .wrapping_add(w) + .wrapping_add(m) + .wrapping_add(c) + .rotate_left(s) + .wrapping_add(x) +} + +#[inline(always)] +fn op_h(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { + (x ^ y ^ z) + .wrapping_add(w) + .wrapping_add(m) + .wrapping_add(c) + .rotate_left(s) + .wrapping_add(x) +} + +#[inline(always)] +fn op_i(w: u32, x: u32, y: u32, z: u32, m: u32, c: u32, s: u32) -> u32 { + (y ^ (x | !z)) + .wrapping_add(w) + .wrapping_add(m) + .wrapping_add(c) + .rotate_left(s) + .wrapping_add(x) +} + +#[inline] +pub fn compress_block(state: &mut [u32; 4], input: &[u8; 64]) { + let mut a = state[0]; + let mut b = state[1]; + let mut c = state[2]; + let mut d = state[3]; + + let mut data = [0u32; 16]; + for (o, chunk) in data.iter_mut().zip(input.chunks_exact(4)) { + *o = u32::from_le_bytes(chunk.try_into().unwrap()); + } + + // round 1 + a = op_f(a, b, c, d, data[0], RC[0], 7); + d = op_f(d, a, b, c, data[1], RC[1], 12); + c = op_f(c, d, a, b, data[2], RC[2], 17); + b = op_f(b, c, d, a, data[3], RC[3], 22); + + a = op_f(a, b, c, d, data[4], RC[4], 7); + d = op_f(d, a, b, c, data[5], RC[5], 12); + c = op_f(c, d, a, b, data[6], RC[6], 17); + b = op_f(b, c, d, a, data[7], RC[7], 22); + + a = op_f(a, b, c, d, data[8], RC[8], 7); + d = op_f(d, a, b, c, data[9], RC[9], 12); + c = op_f(c, d, a, b, data[10], RC[10], 17); + b = op_f(b, c, d, a, data[11], RC[11], 22); + + a = op_f(a, b, c, d, data[12], RC[12], 7); + d = op_f(d, a, b, c, data[13], RC[13], 12); + c = op_f(c, d, a, b, data[14], RC[14], 17); + b = op_f(b, c, d, a, data[15], RC[15], 22); + + // round 2 + a = op_g(a, b, c, d, data[1], RC[16], 5); + d = op_g(d, a, b, c, data[6], RC[17], 9); + c = op_g(c, d, a, b, data[11], RC[18], 14); + b = op_g(b, c, d, a, data[0], RC[19], 20); + + a = op_g(a, b, c, d, data[5], RC[20], 5); + d = op_g(d, a, b, c, data[10], RC[21], 9); + c = op_g(c, d, a, b, data[15], RC[22], 14); + b = op_g(b, c, d, a, data[4], RC[23], 20); + + a = op_g(a, b, c, d, data[9], RC[24], 5); + d = op_g(d, a, b, c, data[14], RC[25], 9); + c = op_g(c, d, a, b, data[3], RC[26], 14); + b = op_g(b, c, d, a, data[8], RC[27], 20); + + a = op_g(a, b, c, d, data[13], RC[28], 5); + d = op_g(d, a, b, c, data[2], RC[29], 9); + c = op_g(c, d, a, b, data[7], RC[30], 14); + b = op_g(b, c, d, a, data[12], RC[31], 20); + + // round 3 + a = op_h(a, b, c, d, data[5], RC[32], 4); + d = op_h(d, a, b, c, data[8], RC[33], 11); + c = op_h(c, d, a, b, data[11], RC[34], 16); + b = op_h(b, c, d, a, data[14], RC[35], 23); + + a = op_h(a, b, c, d, data[1], RC[36], 4); + d = op_h(d, a, b, c, data[4], RC[37], 11); + c = op_h(c, d, a, b, data[7], RC[38], 16); + b = op_h(b, c, d, a, data[10], RC[39], 23); + + a = op_h(a, b, c, d, data[13], RC[40], 4); + d = op_h(d, a, b, c, data[0], RC[41], 11); + c = op_h(c, d, a, b, data[3], RC[42], 16); + b = op_h(b, c, d, a, data[6], RC[43], 23); + + a = op_h(a, b, c, d, data[9], RC[44], 4); + d = op_h(d, a, b, c, data[12], RC[45], 11); + c = op_h(c, d, a, b, data[15], RC[46], 16); + b = op_h(b, c, d, a, data[2], RC[47], 23); + + // round 4 + a = op_i(a, b, c, d, data[0], RC[48], 6); + d = op_i(d, a, b, c, data[7], RC[49], 10); + c = op_i(c, d, a, b, data[14], RC[50], 15); + b = op_i(b, c, d, a, data[5], RC[51], 21); + + a = op_i(a, b, c, d, data[12], RC[52], 6); + d = op_i(d, a, b, c, data[3], RC[53], 10); + c = op_i(c, d, a, b, data[10], RC[54], 15); + b = op_i(b, c, d, a, data[1], RC[55], 21); + + a = op_i(a, b, c, d, data[8], RC[56], 6); + d = op_i(d, a, b, c, data[15], RC[57], 10); + c = op_i(c, d, a, b, data[6], RC[58], 15); + b = op_i(b, c, d, a, data[13], RC[59], 21); + + a = op_i(a, b, c, d, data[4], RC[60], 6); + d = op_i(d, a, b, c, data[11], RC[61], 10); + c = op_i(c, d, a, b, data[2], RC[62], 15); + b = op_i(b, c, d, a, data[9], RC[63], 21); + + state[0] = state[0].wrapping_add(a); + state[1] = state[1].wrapping_add(b); + state[2] = state[2].wrapping_add(c); + state[3] = state[3].wrapping_add(d); +} + +#[inline] +pub fn compress(state: &mut [u32; 4], blocks: &[[u8; 64]]) { + for block in blocks { + compress_block(state, block) + } +} diff --git a/md5/src/lib.rs b/md5/src/lib.rs index 87fe9134f..085275ba9 100644 --- a/md5/src/lib.rs +++ b/md5/src/lib.rs @@ -30,14 +30,9 @@ )] #![warn(missing_docs, rust_2018_idioms)] -#[cfg(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64")))] -extern crate md5_asm as compress; - -#[cfg(not(all(feature = "asm", any(target_arch = "x86", target_arch = "x86_64"))))] -mod compress; - pub use digest::{self, Digest}; +mod compress; use compress::compress; use core::{fmt, slice::from_ref};