From 36b495f3cf23a1f235482ce7f81f0f4be614bb85 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 10 Feb 2022 00:47:48 +1100 Subject: [PATCH] Introduce `ChunkedBitSet` and use it for some dataflow analyses. This reduces peak memory usage significantly for some programs with very large functions, such as: - `keccak`, `unicode_normalization`, and `match-stress-enum`, from the `rustc-perf` benchmark suite; - `http-0.2.6` from crates.io. The new type is used in the analyses where the bitsets can get huge (e.g. 10s of thousands of bits): `MaybeInitializedPlaces`, `MaybeUninitializedPlaces`, and `EverInitializedPlaces`. Some refactoring was required in `rustc_mir_dataflow`. All existing analysis domains are either `BitSet` or a trivial wrapper around `BitSet`, and access in a few places is done via `Borrow` or `BorrowMut`. Now that some of these domains are `ClusterBitSet`, that no longer works. So this commit replaces the `Borrow`/`BorrowMut` usage with a new trait `BitSetExt` containing the needed bitset operations. The impls just forward these to the underlying bitset type. This required fiddling with trait bounds in a few places. The commit also: - Moves `static_assert_size` from `rustc_data_structures` to `rustc_index` so it can be used in the latter; the former now re-exports it so existing users are unaffected. - Factors out some common "clear excess bits in the final word" functionality in `bit_set.rs`. - Uses `fill` in a few places instead of loops. --- compiler/rustc_borrowck/src/lib.rs | 4 +- compiler/rustc_data_structures/src/lib.rs | 2 + compiler/rustc_data_structures/src/macros.rs | 8 - compiler/rustc_index/src/bit_set.rs | 498 +++++++++++++++++- compiler/rustc_index/src/bit_set/tests.rs | 195 +++++++ compiler/rustc_index/src/lib.rs | 12 +- .../src/framework/cursor.rs | 12 +- .../src/framework/engine.rs | 9 +- .../rustc_mir_dataflow/src/framework/fmt.rs | 15 +- .../src/framework/lattice.rs | 35 +- .../rustc_mir_dataflow/src/framework/mod.rs | 62 ++- compiler/rustc_mir_dataflow/src/impls/mod.rs | 14 +- compiler/rustc_mir_dataflow/src/rustc_peek.rs | 7 +- .../src/remove_uninit_drops.rs | 4 +- 14 files changed, 804 insertions(+), 73 deletions(-) diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs index 719862e67c873..753eed5306807 100644 --- a/compiler/rustc_borrowck/src/lib.rs +++ b/compiler/rustc_borrowck/src/lib.rs @@ -22,7 +22,7 @@ use rustc_errors::{Applicability, Diagnostic, DiagnosticBuilder, ErrorReported}; use rustc_hir as hir; use rustc_hir::def_id::LocalDefId; use rustc_hir::Node; -use rustc_index::bit_set::BitSet; +use rustc_index::bit_set::ChunkedBitSet; use rustc_index::vec::IndexVec; use rustc_infer::infer::{InferCtxt, TyCtxtInferExt}; use rustc_middle::mir::{ @@ -1667,7 +1667,7 @@ impl<'cx, 'tcx> MirBorrowckCtxt<'cx, 'tcx> { location: Location, desired_action: InitializationRequiringAction, place_span: (PlaceRef<'tcx>, Span), - maybe_uninits: &BitSet, + maybe_uninits: &ChunkedBitSet, from: u64, to: u64, ) { diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index ea02a73c42204..3e93fe9a11f72 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -37,6 +37,8 @@ extern crate cfg_if; #[macro_use] extern crate rustc_macros; +pub use rustc_index::static_assert_size; + #[inline(never)] #[cold] pub fn cold_path R, R>(f: F) -> R { diff --git a/compiler/rustc_data_structures/src/macros.rs b/compiler/rustc_data_structures/src/macros.rs index 48dfbba7504ef..e05491f6ff6e4 100644 --- a/compiler/rustc_data_structures/src/macros.rs +++ b/compiler/rustc_data_structures/src/macros.rs @@ -1,11 +1,3 @@ -/// Type size assertion. The first argument is a type and the second argument is its expected size. -#[macro_export] -macro_rules! static_assert_size { - ($ty:ty, $size:expr) => { - const _: [(); $size] = [(); ::std::mem::size_of::<$ty>()]; - }; -} - #[macro_export] macro_rules! enum_from_u32 { ($(#[$attr:meta])* pub enum $name:ident { diff --git a/compiler/rustc_index/src/bit_set.rs b/compiler/rustc_index/src/bit_set.rs index 7f376c5fbe54b..12bde02949452 100644 --- a/compiler/rustc_index/src/bit_set.rs +++ b/compiler/rustc_index/src/bit_set.rs @@ -5,16 +5,37 @@ use std::iter; use std::marker::PhantomData; use std::mem; use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl}; +use std::rc::Rc; use std::slice; use rustc_macros::{Decodable, Encodable}; +use Chunk::*; + #[cfg(test)] mod tests; -pub type Word = u64; -pub const WORD_BYTES: usize = mem::size_of::(); -pub const WORD_BITS: usize = WORD_BYTES * 8; +type Word = u64; +const WORD_BYTES: usize = mem::size_of::(); +const WORD_BITS: usize = WORD_BYTES * 8; + +// The choice of chunk size has some trade-offs. +// +// A big chunk size tends to favour cases where many large `ChunkedBitSet`s are +// present, because they require fewer `Chunk`s, reducing the number of +// allocations and reducing peak memory usage. Also, fewer chunk operations are +// required, though more of them might be `Mixed`. +// +// A small chunk size tends to favour cases where many small `ChunkedBitSet`s +// are present, because less space is wasted at the end of the final chunk (if +// it's not full). +const CHUNK_WORDS: usize = 32; +const CHUNK_BITS: usize = CHUNK_WORDS * WORD_BITS; // 2048 bits + +/// ChunkSize is small to keep `Chunk` small. The static assertion ensures it's +/// not too small. +type ChunkSize = u16; +const _: () = assert!(CHUNK_BITS <= ChunkSize::MAX as usize); pub trait BitRelations { fn union(&mut self, other: &Rhs) -> bool; @@ -121,19 +142,12 @@ impl BitSet { /// Clear all elements. #[inline] pub fn clear(&mut self) { - for word in &mut self.words { - *word = 0; - } + self.words.fill(0); } /// Clear excess bits in the final word. fn clear_excess_bits(&mut self) { - let num_bits_in_final_word = self.domain_size % WORD_BITS; - if num_bits_in_final_word > 0 { - let mask = (1 << num_bits_in_final_word) - 1; - let final_word_idx = self.words.len() - 1; - self.words[final_word_idx] &= mask; - } + clear_excess_bits_in_final_word(self.domain_size, &mut self.words); } /// Count the number of set bits in the set. @@ -203,9 +217,7 @@ impl BitSet { /// Sets all bits to true. pub fn insert_all(&mut self) { - for word in &mut self.words { - *word = !0; - } + self.words.fill(!0); self.clear_excess_bits(); } @@ -328,6 +340,407 @@ impl BitRelations> for BitSet { } } +/// A fixed-size bitset type with a partially dense, partially sparse +/// representation. The bitset is broken into chunks, and chunks that are all +/// zeros or all ones are represented and handled very efficiently. +/// +/// This type is especially efficient for sets that typically have a large +/// `domain_size` with significant stretches of all zeros or all ones, and also +/// some stretches with lots of 0s and 1s mixed in a way that causes trouble +/// for `IntervalSet`. +/// +/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also +/// just be `usize`. +/// +/// All operations that involve an element will panic if the element is equal +/// to or greater than the domain size. All operations that involve two bitsets +/// will panic if the bitsets have differing domain sizes. +#[derive(Debug, PartialEq, Eq)] +pub struct ChunkedBitSet { + domain_size: usize, + + /// The chunks. Each one contains exactly CHUNK_BITS values, except the + /// last one which contains 1..=CHUNK_BITS values. + chunks: Box<[Chunk]>, + + marker: PhantomData, +} + +// Note: the chunk domain size is duplicated in each variant. This is a bit +// inconvenient, but it allows the type size to be smaller than if we had an +// outer struct containing a chunk domain size plus the `Chunk`, because the +// compiler can place the chunk domain size after the tag. +#[derive(Clone, Debug, PartialEq, Eq)] +enum Chunk { + /// A chunk that is all zeros; we don't represent the zeros explicitly. + Zeros(ChunkSize), + + /// A chunk that is all ones; we don't represent the ones explicitly. + Ones(ChunkSize), + + /// A chunk that has a mix of zeros and ones, which are represented + /// explicitly and densely. It never has all zeros or all ones. + /// + /// If this is the final chunk there may be excess, unused words. This + /// turns out to be both simpler and have better performance than + /// allocating the minimum number of words, largely because we avoid having + /// to store the length, which would make this type larger. These excess + /// words are always be zero, as are any excess bits in the final in-use + /// word. + /// + /// The second field is the count of 1s set in the chunk, and must satisfy + /// `0 < count < chunk_domain_size`. + /// + /// The words are within an `Rc` because it's surprisingly common to + /// duplicate an entire chunk, e.g. in `ChunkedBitSet::clone_from()`, or + /// when a `Mixed` chunk is union'd into a `Zeros` chunk. When we do need + /// to modify a chunk we use `Rc::make_mut`. + Mixed(ChunkSize, ChunkSize, Rc<[Word; CHUNK_WORDS]>), +} + +// This type is used a lot. Make sure it doesn't unintentionally get bigger. +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +crate::static_assert_size!(Chunk, 16); + +impl ChunkedBitSet { + pub fn domain_size(&self) -> usize { + self.domain_size + } + + #[cfg(test)] + fn assert_valid(&self) { + if self.domain_size == 0 { + assert!(self.chunks.is_empty()); + return; + } + + assert!((self.chunks.len() - 1) * CHUNK_BITS <= self.domain_size); + assert!(self.chunks.len() * CHUNK_BITS >= self.domain_size); + for chunk in self.chunks.iter() { + chunk.assert_valid(); + } + } +} + +impl ChunkedBitSet { + /// Creates a new bitset with a given `domain_size` and chunk kind. + fn new(domain_size: usize, is_empty: bool) -> Self { + let chunks = if domain_size == 0 { + Box::new([]) + } else { + // All the chunks have a chunk_domain_size of `CHUNK_BITS` except + // the final one. + let final_chunk_domain_size = { + let n = domain_size % CHUNK_BITS; + if n == 0 { CHUNK_BITS } else { n } + }; + let mut chunks = + vec![Chunk::new(CHUNK_BITS, is_empty); num_chunks(domain_size)].into_boxed_slice(); + *chunks.last_mut().unwrap() = Chunk::new(final_chunk_domain_size, is_empty); + chunks + }; + ChunkedBitSet { domain_size, chunks, marker: PhantomData } + } + + /// Creates a new, empty bitset with a given `domain_size`. + #[inline] + pub fn new_empty(domain_size: usize) -> Self { + ChunkedBitSet::new(domain_size, /* is_empty */ true) + } + + /// Creates a new, filled bitset with a given `domain_size`. + #[inline] + pub fn new_filled(domain_size: usize) -> Self { + ChunkedBitSet::new(domain_size, /* is_empty */ false) + } + + #[cfg(test)] + fn chunks(&self) -> &[Chunk] { + &self.chunks + } + + /// Count the number of bits in the set. + pub fn count(&self) -> usize { + self.chunks.iter().map(|chunk| chunk.count()).sum() + } + + /// Returns `true` if `self` contains `elem`. + #[inline] + pub fn contains(&self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk = &self.chunks[chunk_index(elem)]; + match &chunk { + Zeros(_) => false, + Ones(_) => true, + Mixed(_, _, words) => { + let (word_index, mask) = chunk_word_index_and_mask(elem); + (words[word_index] & mask) != 0 + } + } + } + + /// Insert `elem`. Returns whether the set has changed. + pub fn insert(&mut self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk_index = chunk_index(elem); + let chunk = &mut self.chunks[chunk_index]; + match *chunk { + Zeros(chunk_domain_size) => { + if chunk_domain_size > 1 { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + let (word_index, mask) = chunk_word_index_and_mask(elem); + words_ref[word_index] |= mask; + *chunk = Mixed(chunk_domain_size, 1, words); + } else { + *chunk = Ones(chunk_domain_size); + } + true + } + Ones(_) => false, + Mixed(chunk_domain_size, ref mut count, ref mut words) => { + // We skip all the work if the bit is already set. + let (word_index, mask) = chunk_word_index_and_mask(elem); + if (words[word_index] & mask) == 0 { + *count += 1; + if *count < chunk_domain_size { + let words = Rc::make_mut(words); + words[word_index] |= mask; + } else { + *chunk = Ones(chunk_domain_size); + } + true + } else { + false + } + } + } + } + + /// Sets all bits to true. + pub fn insert_all(&mut self) { + for chunk in self.chunks.iter_mut() { + *chunk = match *chunk { + Zeros(chunk_domain_size) + | Ones(chunk_domain_size) + | Mixed(chunk_domain_size, ..) => Ones(chunk_domain_size), + } + } + } + + /// Returns `true` if the set has changed. + pub fn remove(&mut self, elem: T) -> bool { + assert!(elem.index() < self.domain_size); + let chunk_index = chunk_index(elem); + let chunk = &mut self.chunks[chunk_index]; + match *chunk { + Zeros(_) => false, + Ones(chunk_domain_size) => { + if chunk_domain_size > 1 { + // We take some effort to avoid copying the words. + let words = Rc::<[Word; CHUNK_WORDS]>::new_zeroed(); + // SAFETY: `words` can safely be all zeroes. + let mut words = unsafe { words.assume_init() }; + let words_ref = Rc::get_mut(&mut words).unwrap(); + + // Set only the bits in use. + let num_words = num_words(chunk_domain_size as usize); + words_ref[..num_words].fill(!0); + clear_excess_bits_in_final_word( + chunk_domain_size as usize, + &mut words_ref[..num_words], + ); + let (word_index, mask) = chunk_word_index_and_mask(elem); + words_ref[word_index] &= !mask; + *chunk = Mixed(chunk_domain_size, chunk_domain_size - 1, words); + } else { + *chunk = Zeros(chunk_domain_size); + } + true + } + Mixed(chunk_domain_size, ref mut count, ref mut words) => { + // We skip all the work if the bit is already clear. + let (word_index, mask) = chunk_word_index_and_mask(elem); + if (words[word_index] & mask) != 0 { + *count -= 1; + if *count > 0 { + let words = Rc::make_mut(words); + words[word_index] &= !mask; + } else { + *chunk = Zeros(chunk_domain_size); + } + true + } else { + false + } + } + } + } + + bit_relations_inherent_impls! {} +} + +impl BitRelations> for ChunkedBitSet { + fn union(&mut self, other: &ChunkedBitSet) -> bool { + assert_eq!(self.domain_size, other.domain_size); + debug_assert_eq!(self.chunks.len(), other.chunks.len()); + + let mut changed = false; + for (mut self_chunk, other_chunk) in self.chunks.iter_mut().zip(other.chunks.iter()) { + match (&mut self_chunk, &other_chunk) { + (_, Zeros(_)) | (Ones(_), _) => {} + (Zeros(self_chunk_domain_size), Ones(other_chunk_domain_size)) + | (Mixed(self_chunk_domain_size, ..), Ones(other_chunk_domain_size)) + | (Zeros(self_chunk_domain_size), Mixed(other_chunk_domain_size, ..)) => { + // `other_chunk` fully overwrites `self_chunk` + debug_assert_eq!(self_chunk_domain_size, other_chunk_domain_size); + *self_chunk = other_chunk.clone(); + changed = true; + } + ( + Mixed( + self_chunk_domain_size, + ref mut self_chunk_count, + ref mut self_chunk_words, + ), + Mixed(_other_chunk_domain_size, _other_chunk_count, other_chunk_words), + ) => { + // First check if the operation would change + // `self_chunk.words`. If not, we can avoid allocating some + // words, and this happens often enough that it's a + // performance win. Also, we only need to operate on the + // in-use words, hence the slicing. + let op = |a, b| a | b; + let num_words = num_words(*self_chunk_domain_size as usize); + if bitwise_changes( + &self_chunk_words[0..num_words], + &other_chunk_words[0..num_words], + op, + ) { + let self_chunk_words = Rc::make_mut(self_chunk_words); + let has_changed = bitwise( + &mut self_chunk_words[0..num_words], + &other_chunk_words[0..num_words], + op, + ); + debug_assert!(has_changed); + *self_chunk_count = self_chunk_words[0..num_words] + .iter() + .map(|w| w.count_ones() as ChunkSize) + .sum(); + if *self_chunk_count == *self_chunk_domain_size { + *self_chunk = Ones(*self_chunk_domain_size); + } + changed = true; + } + } + } + } + changed + } + + fn subtract(&mut self, _other: &ChunkedBitSet) -> bool { + unimplemented!("implement if/when necessary"); + } + + fn intersect(&mut self, _other: &ChunkedBitSet) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl BitRelations> for ChunkedBitSet { + fn union(&mut self, other: &HybridBitSet) -> bool { + // FIXME: this is slow if `other` is dense, and could easily be + // improved, but it hasn't been a problem in practice so far. + assert_eq!(self.domain_size, other.domain_size()); + sequential_update(|elem| self.insert(elem), other.iter()) + } + + fn subtract(&mut self, other: &HybridBitSet) -> bool { + // FIXME: this is slow if `other` is dense, and could easily be + // improved, but it hasn't been a problem in practice so far. + assert_eq!(self.domain_size, other.domain_size()); + sequential_update(|elem| self.remove(elem), other.iter()) + } + + fn intersect(&mut self, _other: &HybridBitSet) -> bool { + unimplemented!("implement if/when necessary"); + } +} + +impl Clone for ChunkedBitSet { + fn clone(&self) -> Self { + ChunkedBitSet { + domain_size: self.domain_size, + chunks: self.chunks.clone(), + marker: PhantomData, + } + } + + /// WARNING: this implementation of clone_from will panic if the two + /// bitsets have different domain sizes. This constraint is not inherent to + /// `clone_from`, but it works with the existing call sites and allows a + /// faster implementation, which is important because this function is hot. + fn clone_from(&mut self, from: &Self) { + assert_eq!(self.domain_size, from.domain_size); + debug_assert_eq!(self.chunks.len(), from.chunks.len()); + + self.chunks.clone_from(&from.chunks) + } +} + +impl Chunk { + #[cfg(test)] + fn assert_valid(&self) { + match *self { + Zeros(chunk_domain_size) | Ones(chunk_domain_size) => { + assert!(chunk_domain_size as usize <= CHUNK_BITS); + } + Mixed(chunk_domain_size, count, ref words) => { + assert!(chunk_domain_size as usize <= CHUNK_BITS); + assert!(0 < count && count < chunk_domain_size); + + // Check the number of set bits matches `count`. + assert_eq!( + words.iter().map(|w| w.count_ones() as ChunkSize).sum::(), + count + ); + + // Check the not-in-use words are all zeroed. + let num_words = num_words(chunk_domain_size as usize); + if num_words < CHUNK_WORDS { + assert_eq!( + words[num_words..] + .iter() + .map(|w| w.count_ones() as ChunkSize) + .sum::(), + 0 + ); + } + } + } + } + + fn new(chunk_domain_size: usize, is_empty: bool) -> Self { + debug_assert!(chunk_domain_size <= CHUNK_BITS); + let chunk_domain_size = chunk_domain_size as ChunkSize; + if is_empty { Zeros(chunk_domain_size) } else { Ones(chunk_domain_size) } + } + + /// Count the number of 1s in the chunk. + fn count(&self) -> usize { + match *self { + Zeros(_) => 0, + Ones(chunk_domain_size) => chunk_domain_size as usize, + Mixed(_, count, _) => count as usize, + } + } +} + // Applies a function to mutate a bitset, and returns true if any // of the applications return true fn sequential_update( @@ -642,6 +1055,23 @@ where changed != 0 } +/// Does this bitwise operation change `out_vec`? +#[inline] +fn bitwise_changes(out_vec: &[Word], in_vec: &[Word], op: Op) -> bool +where + Op: Fn(Word, Word) -> Word, +{ + assert_eq!(out_vec.len(), in_vec.len()); + for (out_elem, in_elem) in iter::zip(out_vec, in_vec) { + let old_val = *out_elem; + let new_val = op(old_val, *in_elem); + if old_val != new_val { + return true; + } + } + false +} + const SPARSE_MAX: usize = 8; /// A fixed-size bitset type with a sparse representation and a maximum of @@ -1136,18 +1566,7 @@ impl BitMatrix { for index in start..end { words[index] = !0; } - self.clear_excess_bits(row); - } - - /// Clear excess bits in the final word of the row. - fn clear_excess_bits(&mut self, row: R) { - let num_bits_in_final_word = self.num_columns % WORD_BITS; - if num_bits_in_final_word > 0 { - let mask = (1 << num_bits_in_final_word) - 1; - let (_, end) = self.range(row); - let final_word_idx = end - 1; - self.words[final_word_idx] &= mask; - } + clear_excess_bits_in_final_word(self.num_columns, &mut self.words[..end]); } /// Gets a slice of the underlying words. @@ -1339,6 +1758,12 @@ fn num_words(domain_size: T) -> usize { (domain_size.index() + WORD_BITS - 1) / WORD_BITS } +#[inline] +fn num_chunks(domain_size: T) -> usize { + assert!(domain_size.index() > 0); + (domain_size.index() + CHUNK_BITS - 1) / CHUNK_BITS +} + #[inline] fn word_index_and_mask(elem: T) -> (usize, Word) { let elem = elem.index(); @@ -1347,6 +1772,25 @@ fn word_index_and_mask(elem: T) -> (usize, Word) { (word_index, mask) } +#[inline] +fn chunk_index(elem: T) -> usize { + elem.index() / CHUNK_BITS +} + +#[inline] +fn chunk_word_index_and_mask(elem: T) -> (usize, Word) { + let chunk_elem = elem.index() % CHUNK_BITS; + word_index_and_mask(chunk_elem) +} + +fn clear_excess_bits_in_final_word(domain_size: usize, words: &mut [Word]) { + let num_bits_in_final_word = domain_size % WORD_BITS; + if num_bits_in_final_word > 0 { + let mask = (1 << num_bits_in_final_word) - 1; + words[words.len() - 1] &= mask; + } +} + #[inline] fn max_bit(word: Word) -> usize { WORD_BITS - 1 - word.leading_zeros() as usize diff --git a/compiler/rustc_index/src/bit_set/tests.rs b/compiler/rustc_index/src/bit_set/tests.rs index e2b07305c9642..eec7dab5189a6 100644 --- a/compiler/rustc_index/src/bit_set/tests.rs +++ b/compiler/rustc_index/src/bit_set/tests.rs @@ -147,6 +147,201 @@ fn hybrid_bitset() { assert!(dense0.is_empty()); } +#[test] +fn chunked_bitset() { + let mut b0 = ChunkedBitSet::::new_empty(0); + let b0b = b0.clone(); + assert_eq!(b0, ChunkedBitSet { domain_size: 0, chunks: Box::new([]), marker: PhantomData }); + + // There are no valid insert/remove/contains operations on a 0-domain + // bitset, but we can test `union`. + b0.assert_valid(); + assert!(!b0.union(&b0b)); + assert_eq!(b0.chunks(), vec![]); + assert_eq!(b0.count(), 0); + b0.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b1 = ChunkedBitSet::::new_empty(1); + assert_eq!( + b1, + ChunkedBitSet { domain_size: 1, chunks: Box::new([Zeros(1)]), marker: PhantomData } + ); + + b1.assert_valid(); + assert!(!b1.contains(0)); + assert_eq!(b1.count(), 0); + assert!(b1.insert(0)); + assert!(b1.contains(0)); + assert_eq!(b1.count(), 1); + assert_eq!(b1.chunks(), [Ones(1)]); + assert!(!b1.insert(0)); + assert!(b1.remove(0)); + assert!(!b1.contains(0)); + assert_eq!(b1.count(), 0); + assert_eq!(b1.chunks(), [Zeros(1)]); + b1.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b100 = ChunkedBitSet::::new_filled(100); + assert_eq!( + b100, + ChunkedBitSet { domain_size: 100, chunks: Box::new([Ones(100)]), marker: PhantomData } + ); + + b100.assert_valid(); + for i in 0..100 { + assert!(b100.contains(i)); + } + assert_eq!(b100.count(), 100); + assert!(b100.remove(3)); + assert!(b100.insert(3)); + assert_eq!(b100.chunks(), vec![Ones(100)]); + assert!( + b100.remove(20) && b100.remove(30) && b100.remove(40) && b100.remove(99) && b100.insert(30) + ); + assert_eq!(b100.count(), 97); + assert!(!b100.contains(20) && b100.contains(30) && !b100.contains(99) && b100.contains(50)); + assert_eq!( + b100.chunks(), + vec![Mixed( + 100, + 97, + #[rustfmt::skip] + Rc::new([ + 0b11111111_11111111_11111110_11111111_11111111_11101111_11111111_11111111, + 0b00000000_00000000_00000000_00000111_11111111_11111111_11111111_11111111, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ]) + )], + ); + b100.assert_valid(); + let mut num_removed = 0; + for i in 0..100 { + if b100.remove(i) { + num_removed += 1; + } + } + assert_eq!(num_removed, 97); + assert_eq!(b100.chunks(), vec![Zeros(100)]); + b100.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b2548 = ChunkedBitSet::::new_empty(2548); + assert_eq!( + b2548, + ChunkedBitSet { + domain_size: 2548, + chunks: Box::new([Zeros(2048), Zeros(500)]), + marker: PhantomData, + } + ); + + b2548.assert_valid(); + b2548.insert(14); + b2548.remove(14); + assert_eq!(b2548.chunks(), vec![Zeros(2048), Zeros(500)]); + b2548.insert_all(); + for i in 0..2548 { + assert!(b2548.contains(i)); + } + assert_eq!(b2548.count(), 2548); + assert_eq!(b2548.chunks(), vec![Ones(2048), Ones(500)]); + b2548.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b4096 = ChunkedBitSet::::new_empty(4096); + assert_eq!( + b4096, + ChunkedBitSet { + domain_size: 4096, + chunks: Box::new([Zeros(2048), Zeros(2048)]), + marker: PhantomData, + } + ); + + b4096.assert_valid(); + for i in 0..4096 { + assert!(!b4096.contains(i)); + } + assert!(b4096.insert(0) && b4096.insert(4095) && !b4096.insert(4095)); + assert!( + b4096.contains(0) && !b4096.contains(2047) && !b4096.contains(2048) && b4096.contains(4095) + ); + assert_eq!( + b4096.chunks(), + #[rustfmt::skip] + vec![ + Mixed(2048, 1, Rc::new([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + ])), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x8000_0000_0000_0000 + ])), + ], + ); + assert_eq!(b4096.count(), 2); + b4096.assert_valid(); + + //----------------------------------------------------------------------- + + let mut b10000 = ChunkedBitSet::::new_empty(10000); + assert_eq!( + b10000, + ChunkedBitSet { + domain_size: 10000, + chunks: Box::new([Zeros(2048), Zeros(2048), Zeros(2048), Zeros(2048), Zeros(1808),]), + marker: PhantomData, + } + ); + + b10000.assert_valid(); + assert!(b10000.insert(3000) && b10000.insert(5000)); + assert_eq!( + b10000.chunks(), + #[rustfmt::skip] + vec![ + Zeros(2048), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100_0000_0000_0000, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ])), + Mixed(2048, 1, Rc::new([ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x0100, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ])), + Zeros(2048), + Zeros(1808), + ], + ); + let mut b10000b = ChunkedBitSet::::new_empty(10000); + b10000b.clone_from(&b10000); + assert_eq!(b10000, b10000b); + for i in 6000..7000 { + b10000b.insert(i); + } + assert_eq!(b10000b.count(), 1002); + b10000b.assert_valid(); + b10000b.clone_from(&b10000); + assert_eq!(b10000b.count(), 2); + for i in 2000..8000 { + b10000b.insert(i); + } + b10000.union(&b10000b); + assert_eq!(b10000.count(), 6000); + b10000.union(&b10000b); + assert_eq!(b10000.count(), 6000); + b10000.assert_valid(); + b10000b.assert_valid(); +} + #[test] fn grow() { let mut set: GrowableBitSet = GrowableBitSet::with_capacity(65); diff --git a/compiler/rustc_index/src/lib.rs b/compiler/rustc_index/src/lib.rs index 7919e40925392..0cfb7bd110602 100644 --- a/compiler/rustc_index/src/lib.rs +++ b/compiler/rustc_index/src/lib.rs @@ -1,11 +1,21 @@ #![feature(allow_internal_unstable)] #![feature(bench_black_box)] #![feature(extend_one)] +#![feature(let_else)] #![feature(min_specialization)] +#![feature(new_uninit)] #![feature(step_trait)] +#![feature(stmt_expr_attributes)] #![feature(test)] -#![feature(let_else)] pub mod bit_set; pub mod interval; pub mod vec; + +/// Type size assertion. The first argument is a type and the second argument is its expected size. +#[macro_export] +macro_rules! static_assert_size { + ($ty:ty, $size:expr) => { + const _: [(); $size] = [(); ::std::mem::size_of::<$ty>()]; + }; +} diff --git a/compiler/rustc_mir_dataflow/src/framework/cursor.rs b/compiler/rustc_mir_dataflow/src/framework/cursor.rs index ba6b566a304a5..2de5a9d6991fe 100644 --- a/compiler/rustc_mir_dataflow/src/framework/cursor.rs +++ b/compiler/rustc_mir_dataflow/src/framework/cursor.rs @@ -1,10 +1,12 @@ //! Random access inspection of the results of a dataflow analysis. +use crate::framework::BitSetExt; + use std::borrow::Borrow; use std::cmp::Ordering; +#[cfg(debug_assertions)] use rustc_index::bit_set::BitSet; -use rustc_index::vec::Idx; use rustc_middle::mir::{self, BasicBlock, Location}; use super::{Analysis, Direction, Effect, EffectIndex, Results}; @@ -209,13 +211,13 @@ where } } -impl<'mir, 'tcx, A, R, T> ResultsCursor<'mir, 'tcx, A, R> +impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R> where - A: Analysis<'tcx, Domain = BitSet>, - T: Idx, + A: crate::GenKillAnalysis<'tcx>, + A::Domain: BitSetExt, R: Borrow>, { - pub fn contains(&self, elem: T) -> bool { + pub fn contains(&self, elem: A::Idx) -> bool { self.get().contains(elem) } } diff --git a/compiler/rustc_mir_dataflow/src/framework/engine.rs b/compiler/rustc_mir_dataflow/src/framework/engine.rs index e20358ba9896d..88ed0128a1fd3 100644 --- a/compiler/rustc_mir_dataflow/src/framework/engine.rs +++ b/compiler/rustc_mir_dataflow/src/framework/engine.rs @@ -1,6 +1,7 @@ //! A solver for dataflow problems. -use std::borrow::BorrowMut; +use crate::framework::BitSetExt; + use std::ffi::OsString; use std::path::PathBuf; @@ -91,7 +92,7 @@ where impl<'a, 'tcx, A, D, T> Engine<'a, 'tcx, A> where A: GenKillAnalysis<'tcx, Idx = T, Domain = D>, - D: Clone + JoinSemiLattice + GenKill + BorrowMut>, + D: Clone + JoinSemiLattice + GenKill + BitSetExt, T: Idx, { /// Creates a new `Engine` to solve a gen-kill dataflow problem. @@ -106,7 +107,7 @@ where // Otherwise, compute and store the cumulative transfer function for each block. - let identity = GenKillSet::identity(analysis.bottom_value(body).borrow().domain_size()); + let identity = GenKillSet::identity(analysis.bottom_value(body).domain_size()); let mut trans_for_block = IndexVec::from_elem(identity, body.basic_blocks()); for (block, block_data) in body.basic_blocks().iter_enumerated() { @@ -115,7 +116,7 @@ where } let apply_trans = Box::new(move |bb: BasicBlock, state: &mut A::Domain| { - trans_for_block[bb].apply(state.borrow_mut()); + trans_for_block[bb].apply(state); }); Self::new(tcx, body, analysis, Some(apply_trans as Box<_>)) diff --git a/compiler/rustc_mir_dataflow/src/framework/fmt.rs b/compiler/rustc_mir_dataflow/src/framework/fmt.rs index 1d1553bbbd984..99735673f4d58 100644 --- a/compiler/rustc_mir_dataflow/src/framework/fmt.rs +++ b/compiler/rustc_mir_dataflow/src/framework/fmt.rs @@ -1,7 +1,7 @@ //! Custom formatting traits used when outputting Graphviz diagrams with the results of a dataflow //! analysis. -use rustc_index::bit_set::{BitSet, HybridBitSet}; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; use rustc_index::vec::Idx; use std::fmt; @@ -133,6 +133,19 @@ where } } +impl DebugWithContext for ChunkedBitSet +where + T: Idx + DebugWithContext, +{ + fn fmt_with(&self, _ctxt: &C, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!("implement when/if needed"); + } + + fn fmt_diff_with(&self, _old: &Self, _ctxt: &C, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!("implement when/if needed"); + } +} + impl DebugWithContext for &'_ T where T: DebugWithContext, diff --git a/compiler/rustc_mir_dataflow/src/framework/lattice.rs b/compiler/rustc_mir_dataflow/src/framework/lattice.rs index f937b31f4cfc9..9a462f6e1a41a 100644 --- a/compiler/rustc_mir_dataflow/src/framework/lattice.rs +++ b/compiler/rustc_mir_dataflow/src/framework/lattice.rs @@ -38,7 +38,8 @@ //! [Hasse diagram]: https://en.wikipedia.org/wiki/Hasse_diagram //! [poset]: https://en.wikipedia.org/wiki/Partially_ordered_set -use rustc_index::bit_set::BitSet; +use crate::framework::BitSetExt; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; use rustc_index::vec::{Idx, IndexVec}; use std::iter; @@ -145,6 +146,18 @@ impl MeetSemiLattice for BitSet { } } +impl JoinSemiLattice for ChunkedBitSet { + fn join(&mut self, other: &Self) -> bool { + self.union(other) + } +} + +impl MeetSemiLattice for ChunkedBitSet { + fn meet(&mut self, other: &Self) -> bool { + self.intersect(other) + } +} + /// The counterpart of a given semilattice `T` using the [inverse order]. /// /// The dual of a join-semilattice is a meet-semilattice and vice versa. For example, the dual of a @@ -155,15 +168,21 @@ impl MeetSemiLattice for BitSet { #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Dual(pub T); -impl std::borrow::Borrow for Dual { - fn borrow(&self) -> &T { - &self.0 +impl BitSetExt for Dual> { + fn domain_size(&self) -> usize { + self.0.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.0.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet) { + self.0.union(other); } -} -impl std::borrow::BorrowMut for Dual { - fn borrow_mut(&mut self) -> &mut T { - &mut self.0 + fn subtract(&mut self, other: &HybridBitSet) { + self.0.subtract(other); } } diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs index dac6720a6e543..c51dd06de251d 100644 --- a/compiler/rustc_mir_dataflow/src/framework/mod.rs +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -30,10 +30,9 @@ //! //! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems -use std::borrow::BorrowMut; use std::cmp::Ordering; -use rustc_index::bit_set::{BitSet, HybridBitSet}; +use rustc_index::bit_set::{BitSet, ChunkedBitSet, HybridBitSet}; use rustc_index::vec::Idx; use rustc_middle::mir::{self, BasicBlock, Location}; use rustc_middle::ty::TyCtxt; @@ -52,6 +51,51 @@ pub use self::engine::{Engine, Results}; pub use self::lattice::{JoinSemiLattice, MeetSemiLattice}; pub use self::visitor::{visit_results, ResultsVisitable, ResultsVisitor}; +/// Analysis domains are all bitsets of various kinds. This trait holds +/// operations needed by all of them. +pub trait BitSetExt { + fn domain_size(&self) -> usize; + fn contains(&self, elem: T) -> bool; + fn union(&mut self, other: &HybridBitSet); + fn subtract(&mut self, other: &HybridBitSet); +} + +impl BitSetExt for BitSet { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet) { + self.subtract(other); + } +} + +impl BitSetExt for ChunkedBitSet { + fn domain_size(&self) -> usize { + self.domain_size() + } + + fn contains(&self, elem: T) -> bool { + self.contains(elem) + } + + fn union(&mut self, other: &HybridBitSet) { + self.union(other); + } + + fn subtract(&mut self, other: &HybridBitSet) { + self.subtract(other); + } +} + /// Define the domain of a dataflow problem. /// /// This trait specifies the lattice on which this analysis operates (the domain) as well as its @@ -303,7 +347,7 @@ pub trait GenKillAnalysis<'tcx>: Analysis<'tcx> { impl<'tcx, A> Analysis<'tcx> for A where A: GenKillAnalysis<'tcx>, - A::Domain: GenKill + BorrowMut>, + A::Domain: GenKill + BitSetExt, { fn apply_statement_effect( &self, @@ -435,7 +479,7 @@ impl GenKillSet { } } - pub fn apply(&self, state: &mut BitSet) { + pub fn apply(&self, state: &mut impl BitSetExt) { state.union(&self.gen); state.subtract(&self.kill); } @@ -463,6 +507,16 @@ impl GenKill for BitSet { } } +impl GenKill for ChunkedBitSet { + fn gen(&mut self, elem: T) { + self.insert(elem); + } + + fn kill(&mut self, elem: T) { + self.remove(elem); + } +} + impl GenKill for lattice::Dual> { fn gen(&mut self, elem: T) { self.0.insert(elem); diff --git a/compiler/rustc_mir_dataflow/src/impls/mod.rs b/compiler/rustc_mir_dataflow/src/impls/mod.rs index 63d935db8ca13..9502c5d57d688 100644 --- a/compiler/rustc_mir_dataflow/src/impls/mod.rs +++ b/compiler/rustc_mir_dataflow/src/impls/mod.rs @@ -2,7 +2,7 @@ //! bitvectors attached to each basic block, represented via a //! zero-sized structure. -use rustc_index::bit_set::BitSet; +use rustc_index::bit_set::{BitSet, ChunkedBitSet}; use rustc_index::vec::Idx; use rustc_middle::mir::visit::{MirVisitable, Visitor}; use rustc_middle::mir::{self, Body, Location}; @@ -286,12 +286,12 @@ impl<'a, 'tcx> DefinitelyInitializedPlaces<'a, 'tcx> { } impl<'tcx> AnalysisDomain<'tcx> for MaybeInitializedPlaces<'_, 'tcx> { - type Domain = BitSet; + type Domain = ChunkedBitSet; const NAME: &'static str = "maybe_init"; fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { // bottom = uninitialized - BitSet::new_empty(self.move_data().move_paths.len()) + ChunkedBitSet::new_empty(self.move_data().move_paths.len()) } fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) { @@ -417,13 +417,13 @@ impl<'tcx> GenKillAnalysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> { } impl<'tcx> AnalysisDomain<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> { - type Domain = BitSet; + type Domain = ChunkedBitSet; const NAME: &'static str = "maybe_uninit"; fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { // bottom = initialized (start_block_effect counters this at outset) - BitSet::new_empty(self.move_data().move_paths.len()) + ChunkedBitSet::new_empty(self.move_data().move_paths.len()) } // sets on_entry bits for Arg places @@ -606,13 +606,13 @@ impl<'tcx> GenKillAnalysis<'tcx> for DefinitelyInitializedPlaces<'_, 'tcx> { } impl<'tcx> AnalysisDomain<'tcx> for EverInitializedPlaces<'_, 'tcx> { - type Domain = BitSet; + type Domain = ChunkedBitSet; const NAME: &'static str = "ever_init"; fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain { // bottom = no initialized variables by default - BitSet::new_empty(self.move_data().inits.len()) + ChunkedBitSet::new_empty(self.move_data().inits.len()) } fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain) { diff --git a/compiler/rustc_mir_dataflow/src/rustc_peek.rs b/compiler/rustc_mir_dataflow/src/rustc_peek.rs index 1746d5ee38b92..51ab5b43bff0a 100644 --- a/compiler/rustc_mir_dataflow/src/rustc_peek.rs +++ b/compiler/rustc_mir_dataflow/src/rustc_peek.rs @@ -1,5 +1,3 @@ -use std::borrow::Borrow; - use rustc_ast::ast; use rustc_span::symbol::sym; use rustc_span::Span; @@ -10,6 +8,7 @@ use rustc_middle::mir::MirPass; use rustc_middle::mir::{self, Body, Local, Location}; use rustc_middle::ty::{self, Ty, TyCtxt}; +use crate::framework::BitSetExt; use crate::impls::{ DefinitelyInitializedPlaces, MaybeInitializedPlaces, MaybeLiveLocals, MaybeUninitializedPlaces, }; @@ -248,7 +247,7 @@ pub trait RustcPeekAt<'tcx>: Analysis<'tcx> { impl<'tcx, A, D> RustcPeekAt<'tcx> for A where A: Analysis<'tcx, Domain = D> + HasMoveData<'tcx>, - D: JoinSemiLattice + Clone + Borrow>, + D: JoinSemiLattice + Clone + BitSetExt, { fn peek_at( &self, @@ -259,7 +258,7 @@ where ) { match self.move_data().rev_lookup.find(place.as_ref()) { LookupResult::Exact(peek_mpi) => { - let bit_state = flow_state.borrow().contains(peek_mpi); + let bit_state = flow_state.contains(peek_mpi); debug!("rustc_peek({:?} = &{:?}) bit_state: {}", call.arg, place, bit_state); if !bit_state { tcx.sess.span_err(call.span, "rustc_peek: bit not set"); diff --git a/compiler/rustc_mir_transform/src/remove_uninit_drops.rs b/compiler/rustc_mir_transform/src/remove_uninit_drops.rs index fc5ac97e3e1dc..dc1b76aba1613 100644 --- a/compiler/rustc_mir_transform/src/remove_uninit_drops.rs +++ b/compiler/rustc_mir_transform/src/remove_uninit_drops.rs @@ -1,4 +1,4 @@ -use rustc_index::bit_set::BitSet; +use rustc_index::bit_set::ChunkedBitSet; use rustc_middle::mir::{Body, Field, Rvalue, Statement, StatementKind, TerminatorKind}; use rustc_middle::ty::subst::SubstsRef; use rustc_middle::ty::{self, ParamEnv, Ty, TyCtxt, VariantDef}; @@ -89,7 +89,7 @@ impl<'tcx> MirPass<'tcx> for RemoveUninitDrops { fn is_needs_drop_and_init<'tcx>( tcx: TyCtxt<'tcx>, param_env: ParamEnv<'tcx>, - maybe_inits: &BitSet, + maybe_inits: &ChunkedBitSet, move_data: &MoveData<'tcx>, ty: Ty<'tcx>, mpi: MovePathIndex,