Skip to content

Commit

Permalink
fix(deflate): work around upstream rust change causing performance re…
Browse files Browse the repository at this point in the history
…gression

mostly fixes #163
  • Loading branch information
oyvindln committed Feb 10, 2025
1 parent 921bc2c commit 7014124
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
7 changes: 5 additions & 2 deletions miniz_oxide/src/deflate/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
//! to avoid stack copies. Box::new() doesn't at the moment, and using a vec means we would lose
//! static length info.
use crate::deflate::core::{LZ_DICT_SIZE, MAX_MATCH_LEN};
use alloc::boxed::Box;
use alloc::vec;
use crate::deflate::core::{LZ_DICT_SIZE, MAX_MATCH_LEN};

/// Size of the buffer of lz77 encoded data.
pub const LZ_CODE_BUF_SIZE: usize = 64 * 1024;
Expand Down Expand Up @@ -42,7 +42,10 @@ impl HashBuffers {
impl Default for HashBuffers {
fn default() -> HashBuffers {
HashBuffers {
dict: vec![0; LZ_DICT_FULL_SIZE].into_boxed_slice().try_into().unwrap(),
dict: vec![0; LZ_DICT_FULL_SIZE]
.into_boxed_slice()
.try_into()
.unwrap(),
next: vec![0; LZ_DICT_SIZE].into_boxed_slice().try_into().unwrap(),
hash: vec![0; LZ_DICT_SIZE].into_boxed_slice().try_into().unwrap(),
}
Expand Down
21 changes: 13 additions & 8 deletions miniz_oxide/src/deflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ pub(crate) struct DictOxide {
pub lookahead_size: usize,
pub lookahead_pos: usize,
pub size: usize,
loop_len: u8,
}

const fn probes_from_flags(flags: u32) -> [u32; 2] {
Expand All @@ -1158,6 +1159,7 @@ impl DictOxide {
lookahead_size: 0,
lookahead_pos: 0,
size: 0,
loop_len: 32,
}
}

Expand Down Expand Up @@ -1203,7 +1205,7 @@ impl DictOxide {

/// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of
/// type T.
#[inline]
#[inline(always)]
fn read_as_u16(&self, pos: usize) -> u16 {
read_u16_le(&self.b.dict[..], pos)
}
Expand All @@ -1228,16 +1230,16 @@ impl DictOxide {
let max_match_len = cmp::min(MAX_MATCH_LEN as u32, max_match_len);
match_len = cmp::max(match_len, 1);

let pos = lookahead_pos & LZ_DICT_SIZE_MASK;
let mut probe_pos = pos;
// Number of probes into the hash chains.
let mut num_probes_left = self.max_probes[(match_len >= 32) as usize];

// If we already have a match of the full length don't bother searching for another one.
if max_match_len <= match_len {
return (match_dist, match_len);
}

let pos = lookahead_pos & LZ_DICT_SIZE_MASK;
let mut probe_pos = pos;
// Number of probes into the hash chains.
let mut num_probes_left = self.max_probes[(match_len >= 32) as usize];

// Read the last byte of the current match, and the next one, used to compare matches.
let mut c01: u16 = self.read_as_u16(pos + match_len as usize - 1);
// Read the two bytes at the end position of the current match.
Expand Down Expand Up @@ -1289,7 +1291,10 @@ impl DictOxide {
let mut p = pos + 2;
let mut q = probe_pos + 2;
// The first two bytes matched, so check the full length of the match.
for _ in 0..32 {
// TODO: This is a workaround for an upstream issue introduced after a LLVM upgrade in rust 1.82.
// the compiler is too smart and ends up unrolling the loop which causes the performance to get worse
// Using a variable instead of a constant here to prevent it seems to at least get back some of the performance loss.
for _ in 0..self.loop_len as i32 {
let p_data: u64 = self.read_unaligned_u64(p);
let q_data: u64 = self.read_unaligned_u64(q);
// Compare of 8 bytes at a time by using unaligned loads of 64-bit integers.
Expand All @@ -1312,7 +1317,7 @@ impl DictOxide {
}
// We found a better match, so save the last two bytes for further match
// comparisons.
c01 = self.read_as_u16(pos + match_len as usize - 1)
c01 = read_u16_le(&self.b.dict[..], pos + match_len as usize - 1);
}
continue 'outer;
}
Expand Down

0 comments on commit 7014124

Please sign in to comment.