diff --git a/Cargo.toml b/Cargo.toml index 3a66dad5..9ebaf817 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,8 +17,7 @@ repository = "https://github.com/bytecodealliance/regalloc2" log = { version = "0.4.8", default-features = false } smallvec = { version = "1.6.1", features = ["union"] } rustc-hash = { version = "2.0.0", default-features = false } -slice-group-by = { version = "0.3.0", default-features = false } -hashbrown = { version = "0.14", features = ["ahash"], default-features = false } +hashbrown = { version = "0.14", default-features = false, features = [] } # Optional serde support, enabled by feature below. serde = { version = "1.0.136", features = [ @@ -28,6 +27,8 @@ serde = { version = "1.0.136", features = [ # The below are only needed for fuzzing. libfuzzer-sys = { version = "0.4.2", optional = true } +bumpalo = { version = "3.16.0", features = ["allocator-api2"] } +allocator-api2 = { version = "0.2.18", default-features = false, features = ["alloc"] } # When testing regalloc2 by itself, enable debug assertions and overflow checks [profile.release] diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs index 5957ccf7..72c9f12d 100644 --- a/fuzz/fuzz_targets/domtree.rs +++ b/fuzz/fuzz_targets/domtree.rs @@ -116,13 +116,21 @@ impl Arbitrary<'_> for TestCase { } fuzz_target!(|testcase: TestCase| { - let postord = postorder::calculate(testcase.cfg.num_blocks, Block::new(0), |block| { - &testcase.cfg.succs[block.index()] - }); - let idom = domtree::calculate( + let mut postorder = vec![]; + postorder::calculate( + testcase.cfg.num_blocks, + Block::new(0), + &mut vec![], + &mut postorder, + |block| &testcase.cfg.succs[block.index()], + ); + let mut idom = vec![]; + domtree::calculate( testcase.cfg.num_blocks, |block| &testcase.cfg.preds[block.index()], - &postord[..], + &postorder[..], + &mut vec![], + &mut idom, Block::new(0), ); check_idom_violations(&idom[..], &testcase.path); diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs index aa754c71..b48417bb 100644 --- a/fuzz/fuzz_targets/ion.rs +++ b/fuzz/fuzz_targets/ion.rs @@ -11,6 +11,14 @@ fuzz_target!(|func: Func| { let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let _out = - regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed"); + + thread_local! { + // We test that ctx is cleared properly between runs. + static CTX: std::cell::RefCell = std::cell::RefCell::default(); + } + + CTX.with(|ctx| { + let _out = regalloc2::fuzzing::ion::run(&func, &env, &mut *ctx.borrow_mut(), false, false) + .expect("regalloc did not succeed"); + }); }); diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs index 632d5d72..af41b55e 100644 --- a/fuzz/fuzz_targets/ion_checker.rs +++ b/fuzz/fuzz_targets/ion_checker.rs @@ -36,10 +36,18 @@ fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); - let out = - regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed"); - let mut checker = Checker::new(&func, &env); - checker.prepare(&out); - checker.run().expect("checker failed"); + thread_local! { + // We test that ctx is cleared properly between runs. + static CTX: std::cell::RefCell = std::cell::RefCell::default(); + } + + CTX.with(|ctx| { + regalloc2::fuzzing::ion::run(&func, &env, &mut *ctx.borrow_mut(), true, false) + .expect("regalloc did not succeed"); + + let mut checker = Checker::new(&func, &env); + checker.prepare(&ctx.borrow().output); + checker.run().expect("checker failed"); + }); }); diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs index 0994e348..621c94b7 100644 --- a/fuzz/fuzz_targets/ssagen.rs +++ b/fuzz/fuzz_targets/ssagen.rs @@ -5,7 +5,7 @@ #![no_main] use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; -use regalloc2::fuzzing::cfg::CFGInfo; +use regalloc2::fuzzing::cfg::{CFGInfo, CFGInfoCtx}; use regalloc2::fuzzing::func::{Func, Options}; use regalloc2::fuzzing::fuzz_target; use regalloc2::ssa::validate_ssa; @@ -33,6 +33,13 @@ impl Arbitrary<'_> for TestCase { } fuzz_target!(|t: TestCase| { - let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info"); - validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); + thread_local! { + // We test that ctx is cleared properly between runs. + static CFG_INFO: std::cell::RefCell<(CFGInfo, CFGInfoCtx)> = std::cell::RefCell::default(); + } + + CFG_INFO.with_borrow_mut(|(cfginfo, ctx)| { + cfginfo.init(&t.f, ctx).expect("could not create CFG info"); + validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); + }); }); diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs index e396ddab..b5a21842 100644 --- a/regalloc2-tool/src/main.rs +++ b/regalloc2-tool/src/main.rs @@ -71,7 +71,7 @@ fn main() { } fn print_output(func: &SerializableFunction, output: &Output) { - print!("Register allocation result: {{\n"); + println!("Register allocation result: {{"); for i in 0..func.num_blocks() { let block = Block::new(i); let succs = func @@ -84,7 +84,7 @@ fn print_output(func: &SerializableFunction, output: &Output) { .iter() .map(|b| b.index()) .collect::>(); - print!(" block{}: # succs:{:?} preds:{:?}\n", i, succs, preds); + println!(" block{}: # succs:{:?} preds:{:?}", i, succs, preds); for inst_or_edit in output.block_insts_and_edits(func, block) { match inst_or_edit { InstOrEdit::Inst(inst) => { @@ -102,13 +102,13 @@ fn print_output(func: &SerializableFunction, output: &Output) { .map(|(op, alloc)| format!("{op} => {alloc}")) .collect(); let ops = ops.join(", "); - print!(" inst{}: {op} {ops}\n", inst.index(),); + println!(" inst{}: {op} {ops}", inst.index(),); } InstOrEdit::Edit(Edit::Move { from, to }) => { - print!(" edit: move {to} <- {from}\n"); + println!(" edit: move {to} <- {from}"); } } } } - print!("}}\n"); + println!("}}"); } diff --git a/src/cfg.rs b/src/cfg.rs index 2494d27d..8a653cd3 100644 --- a/src/cfg.rs +++ b/src/cfg.rs @@ -5,12 +5,19 @@ //! Lightweight CFG analyses. -use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError}; -use alloc::vec; -use alloc::vec::Vec; +use crate::alloc::vec::Vec; + +use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError, VecExt}; use smallvec::{smallvec, SmallVec}; -#[derive(Clone, Debug)] +#[derive(Debug, Default)] +pub struct CFGInfoCtx { + visited: Vec, + block_to_rpo: Vec>, + backedge: Vec, +} + +#[derive(Debug, Default)] pub struct CFGInfo { /// Postorder traversal of blocks. pub postorder: Vec, @@ -33,21 +40,41 @@ pub struct CFGInfo { } impl CFGInfo { - pub fn new(f: &F) -> Result { - let postorder = postorder::calculate(f.num_blocks(), f.entry_block(), |block| { - f.block_succs(block) - }); - let domtree = domtree::calculate( - f.num_blocks(), + pub fn new(f: &F) -> Result { + let mut ctx = CFGInfoCtx::default(); + let mut this = Self::default(); + this.init(f, &mut ctx)?; + Ok(this) + } + + pub fn init(&mut self, f: &F, ctx: &mut CFGInfoCtx) -> Result<(), RegAllocError> { + let nb = f.num_blocks(); + + postorder::calculate( + nb, + f.entry_block(), + &mut ctx.visited, + &mut self.postorder, + |block| f.block_succs(block), + ); + + domtree::calculate( + nb, |block| f.block_preds(block), - &postorder[..], + &self.postorder, + &mut ctx.block_to_rpo, + &mut self.domtree, f.entry_block(), ); - let mut insn_block = vec![Block::invalid(); f.num_insts()]; - let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; - let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; - let mut backedge_in = vec![0; f.num_blocks()]; - let mut backedge_out = vec![0; f.num_blocks()]; + + let insn_block = self.insn_block.repopulate(f.num_insts(), Block::invalid()); + let block_entry = self + .block_entry + .repopulate(nb, ProgPoint::before(Inst::invalid())); + let block_exit = self + .block_exit + .repopulate(nb, ProgPoint::before(Inst::invalid())); + let (backedge_in, backedge_out) = ctx.backedge.repopulate(nb * 2, 0).split_at_mut(nb); for block in 0..f.num_blocks() { let block = Block::new(block); @@ -98,10 +125,10 @@ impl CFGInfo { } } - let mut approx_loop_depth = vec![]; - let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![]; + let approx_loop_depth = self.approx_loop_depth.cleared(); + let mut backedge_stack: SmallVec<[u32; 4]> = smallvec![]; let mut cur_depth = 0; - for block in 0..f.num_blocks() { + for block in 0..nb { if backedge_in[block] > 0 { cur_depth += 1; backedge_stack.push(backedge_in[block]); @@ -119,14 +146,7 @@ impl CFGInfo { } } - Ok(CFGInfo { - postorder, - domtree, - insn_block, - block_entry, - block_exit, - approx_loop_depth, - }) + Ok(()) } pub fn dominates(&self, a: Block, b: Block) -> bool { diff --git a/src/domtree.rs b/src/domtree.rs index 33c71081..4b90d4dc 100644 --- a/src/domtree.rs +++ b/src/domtree.rs @@ -12,10 +12,11 @@ // TR-06-33870 // https://www.cs.rice.edu/~keith/EMBED/dom.pdf -use alloc::vec; +use core::u32; + use alloc::vec::Vec; -use crate::Block; +use crate::{Block, VecExt}; // Helper fn merge_sets( @@ -44,19 +45,18 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( num_blocks: usize, preds: PredFn, post_ord: &[Block], + block_to_rpo_scratch: &mut Vec>, + out: &mut Vec, start: Block, -) -> Vec { +) { // We have post_ord, which is the postorder sequence. - // Compute maps from RPO to block number and vice-versa. - let mut block_to_rpo = vec![None; num_blocks]; - block_to_rpo.resize(num_blocks, None); + let block_to_rpo = block_to_rpo_scratch.repopulate(num_blocks, None); for (i, rpo_block) in post_ord.iter().rev().enumerate() { block_to_rpo[rpo_block.index()] = Some(i as u32); } - let mut idom = vec![Block::invalid(); num_blocks]; - + let idom = out.repopulate(num_blocks, Block::invalid()); // The start node must have itself as a parent. idom[start.index()] = start; @@ -70,11 +70,11 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( let mut parent = Block::invalid(); for &pred in preds(node).iter() { let pred_rpo = match block_to_rpo[pred.index()] { - Some(r) => r, None => { // Skip unreachable preds. continue; } + Some(r) => r, }; if pred_rpo < rponum { parent = pred; @@ -104,8 +104,6 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( // Now set the start node's dominator-tree parent to "invalid"; // this allows the loop in `dominates` to terminate. idom[start.index()] = Block::invalid(); - - idom } pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool { diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs index 9f0679ea..37c61155 100644 --- a/src/fastalloc/lru.rs +++ b/src/fastalloc/lru.rs @@ -1,11 +1,10 @@ -use crate::{PReg, PRegSet, RegClass}; +use crate::{FxHashSet, PReg, PRegSet, RegClass}; use alloc::vec; use alloc::vec::Vec; use core::{ fmt, ops::{Index, IndexMut}, }; -use hashbrown::HashSet; /// A least-recently-used cache organized as a linked list based on a vector. pub struct Lru { @@ -193,7 +192,7 @@ impl Lru { ); if self.head != u8::MAX { let mut node = self.data[self.head as usize].next; - let mut seen = HashSet::new(); + let mut seen = FxHashSet::default(); while node != self.head { if seen.contains(&node) { panic!( @@ -245,7 +244,7 @@ impl fmt::Debug for Lru { } else { let mut data_str = format!("p{}", self.head); let mut node = self.data[self.head as usize].next; - let mut seen = HashSet::new(); + let mut seen = FxHashSet::default(); while node != self.head { if seen.contains(&node) { panic!( diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs index 3c71f54b..c8d41f6c 100644 --- a/src/fastalloc/mod.rs +++ b/src/fastalloc/mod.rs @@ -2,8 +2,8 @@ use crate::moves::{MoveAndScratchResolver, ParallelMoves}; use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError}; use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint}; use crate::{ - AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, - PReg, PRegSet, RegClass, SpillSlot, VReg, + AllocationKind, Block, FxHashMap, Inst, InstPosition, Operand, OperandConstraint, OperandKind, + OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg, }; use alloc::vec::Vec; use core::convert::TryInto; @@ -1150,17 +1150,16 @@ impl<'a, F: Function> Env<'a, F> { fn log_post_reload_at_begin_state(&self, block: Block) { use alloc::format; - use hashbrown::HashMap; trace!(""); trace!("State after instruction reload_at_begin of {:?}", block); - let mut map = HashMap::new(); + let mut map = FxHashMap::default(); for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { if *alloc != Allocation::none() { map.insert(format!("vreg{vreg_idx}"), alloc); } } trace!("vreg_allocs: {:?}", map); - let mut map = HashMap::new(); + let mut map = FxHashMap::default(); for i in 0..self.vreg_in_preg.len() { if self.vreg_in_preg[i] != VReg::invalid() { map.insert(PReg::from_index(i), self.vreg_in_preg[i]); @@ -1174,10 +1173,9 @@ impl<'a, F: Function> Env<'a, F> { fn log_post_inst_processing_state(&self, inst: Inst) { use alloc::format; - use hashbrown::HashMap; trace!(""); trace!("State after instruction {:?}", inst); - let mut map = HashMap::new(); + let mut map = FxHashMap::default(); for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() { if *alloc != Allocation::none() { map.insert(format!("vreg{vreg_idx}"), alloc); @@ -1289,8 +1287,7 @@ pub fn run( enable_ssa_checker: bool, ) -> Result { if enable_ssa_checker { - let cfginfo = CFGInfo::new(func)?; - validate_ssa(func, &cfginfo)?; + validate_ssa(func, &CFGInfo::new(func)?)?; } if trace_enabled!() || verbose_log { diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 5cd3654a..69ce7262 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -188,13 +188,20 @@ impl FuncBuilder { } fn compute_doms(&mut self) { - self.postorder = postorder::calculate(self.f.blocks.len(), Block::new(0), |block| { - &self.f.block_succs[block.index()][..] - }); - self.idom = domtree::calculate( + let f = &self.f; + postorder::calculate( self.f.blocks.len(), - |block| &self.f.block_preds[block.index()][..], + Block::new(0), + &mut vec![], + &mut self.postorder, + |block| &f.block_succs[block.index()][..], + ); + domtree::calculate( + self.f.blocks.len(), + |block| &f.block_preds[block.index()][..], &self.postorder[..], + &mut vec![], + &mut self.idom, Block::new(0), ); } diff --git a/src/index.rs b/src/index.rs index 4194e587..12c4572b 100644 --- a/src/index.rs +++ b/src/index.rs @@ -3,17 +3,17 @@ macro_rules! define_index { ($ix:ident, $storage:ident, $elem:ident) => { define_index!($ix); - #[derive(Clone, Debug)] + #[derive(Clone, Debug, Default)] pub struct $storage { storage: Vec<$elem>, } impl $storage { #[inline(always)] - pub fn with_capacity(n: usize) -> Self { - Self { - storage: Vec::with_capacity(n), - } + /// See `VecExt::preallocate` + pub fn preallocate(&mut self, cap: usize) { + use $crate::VecExt; + self.storage.preallocate(cap); } #[inline(always)] diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index 93ce3917..848ab2a3 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -13,19 +13,22 @@ //! Data structures for backtracking allocator. use super::liveranges::SpillWeight; -use crate::cfg::CFGInfo; +use crate::cfg::{CFGInfo, CFGInfoCtx}; use crate::index::ContainerComparator; use crate::indexset::IndexSet; +use crate::Vec2; use crate::{ - define_index, Allocation, Block, Edit, Function, FxHashSet, MachineEnv, Operand, PReg, - ProgPoint, RegClass, VReg, + define_index, Allocation, Block, Bump, Edit, Function, FxHashMap, FxHashSet, MachineEnv, + Operand, Output, PReg, ProgPoint, RegClass, VReg, }; use alloc::collections::BTreeMap; +use alloc::collections::VecDeque; use alloc::string::String; use alloc::vec::Vec; use core::cmp::Ordering; use core::fmt::Debug; -use smallvec::{smallvec, SmallVec}; +use core::ops::{Deref, DerefMut}; +use smallvec::SmallVec; /// A range from `from` (inclusive) to `to` (exclusive). #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -102,7 +105,7 @@ define_index!(PRegIndex); define_index!(SpillSlotIndex); /// Used to carry small sets of bundles, e.g. for conflict sets. -pub type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; +pub type LiveBundleVec = Vec; #[derive(Clone, Copy, Debug)] pub struct LiveRangeListEntry { @@ -110,8 +113,8 @@ pub struct LiveRangeListEntry { pub index: LiveRangeIndex, } -pub type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; -pub type UseList = SmallVec<[Use; 4]>; +pub type LiveRangeList = Vec2; +pub type UseList = Vec2; #[derive(Clone, Debug)] pub struct LiveRange { @@ -120,8 +123,7 @@ pub struct LiveRange { pub vreg: VRegIndex, pub bundle: LiveBundleIndex, pub uses_spill_weight_and_flags: u32, - - pub uses: UseList, + pub(crate) uses: UseList, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -196,7 +198,7 @@ impl Use { #[derive(Clone, Debug)] pub struct LiveBundle { - pub ranges: LiveRangeList, + pub(crate) ranges: LiveRangeList, pub spillset: SpillSetIndex, pub allocation: Allocation, pub prio: u32, // recomputed after every bulk update @@ -298,7 +300,7 @@ pub(crate) const MAX_SPLITS_PER_SPILLSET: u8 = 2; #[derive(Clone, Debug)] pub struct VRegData { - pub ranges: LiveRangeList, + pub(crate) ranges: LiveRangeList, pub blockparam: Block, // We don't initially know the RegClass until we observe a use of the VReg. pub class: Option, @@ -380,23 +382,22 @@ impl BlockparamIn { } impl LiveRanges { - pub fn add(&mut self, range: CodeRange) -> LiveRangeIndex { + pub(crate) fn add(&mut self, range: CodeRange, bump: Bump) -> LiveRangeIndex { self.push(LiveRange { range, vreg: VRegIndex::invalid(), bundle: LiveBundleIndex::invalid(), uses_spill_weight_and_flags: 0, - - uses: smallvec![], + uses: UseList::new_in(bump), }) } } impl LiveBundles { - pub fn add(&mut self) -> LiveBundleIndex { + pub(crate) fn add(&mut self, bump: Bump) -> LiveBundleIndex { self.push(LiveBundle { allocation: Allocation::none(), - ranges: smallvec![], + ranges: LiveRangeList::new_in(bump), spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, @@ -428,29 +429,28 @@ impl core::ops::IndexMut for VRegs { } } -#[derive(Clone, Debug)] -pub struct Env<'a, F: Function> { - pub func: &'a F, - pub env: &'a MachineEnv, - pub cfginfo: CFGInfo, - pub liveins: Vec, - pub liveouts: Vec, - pub blockparam_outs: Vec, - pub blockparam_ins: Vec, - - pub ranges: LiveRanges, - pub bundles: LiveBundles, - pub spillsets: SpillSets, - pub vregs: VRegs, - pub pregs: Vec, - pub allocation_queue: PrioQueue, - - pub spilled_bundles: Vec, - pub spillslots: Vec, - pub slots_by_class: [SpillSlotList; 3], - - pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3], - pub preferred_victim_by_class: [PReg; 3], +#[derive(Default)] +pub struct Ctx { + pub(crate) cfginfo: CFGInfo, + pub(crate) cfginfo_ctx: CFGInfoCtx, + pub(crate) liveins: Vec, + pub(crate) liveouts: Vec, + pub(crate) blockparam_outs: Vec, + pub(crate) blockparam_ins: Vec, + + pub(crate) ranges: LiveRanges, + pub(crate) bundles: LiveBundles, + pub(crate) spillsets: SpillSets, + pub(crate) vregs: VRegs, + pub(crate) pregs: Vec, + pub(crate) allocation_queue: PrioQueue, + + pub(crate) spilled_bundles: Vec, + pub(crate) spillslots: Vec, + pub(crate) slots_by_class: [SpillSlotList; 3], + + pub(crate) extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3], + pub(crate) preferred_victim_by_class: [PReg; 3], // When multiple fixed-register constraints are present on a // single VReg at a single program point (this can happen for, @@ -460,26 +460,61 @@ pub struct Env<'a, F: Function> { // the register available. When we produce the final edit-list, we // will insert a copy from wherever the VReg's primary allocation // was to the approprate PReg. - pub multi_fixed_reg_fixups: Vec, - - // Output: - pub allocs: Vec, - pub inst_alloc_offsets: Vec, - pub num_spillslots: u32, - pub debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, - - pub allocated_bundle_count: usize, + pub(crate) multi_fixed_reg_fixups: Vec, - pub stats: Stats, + pub(crate) allocated_bundle_count: usize, // For debug output only: a list of textual annotations at every // ProgPoint to insert into the final allocated program listing. - pub debug_annotations: hashbrown::HashMap>, - pub annotations_enabled: bool, + pub(crate) debug_annotations: FxHashMap>, + pub(crate) annotations_enabled: bool, // Cached allocation for `try_to_allocate_bundle_to_reg` to avoid allocating // a new HashSet on every call. - pub conflict_set: FxHashSet, + pub(crate) conflict_set: FxHashSet, + + // Output: + pub output: Output, + + pub(crate) scratch_conflicts: LiveBundleVec, + pub(crate) scratch_bundle: LiveBundleVec, + pub(crate) scratch_vreg_ranges: Vec, + pub(crate) scratch_spillset_pool: Vec, + + pub(crate) scratch_workqueue: VecDeque, + + pub(crate) scratch_operand_rewrites: FxHashMap, + pub(crate) scratch_removed_lrs: FxHashSet, + pub(crate) scratch_removed_lrs_vregs: FxHashSet, + pub(crate) scratch_workqueue_set: FxHashSet, + + pub(crate) scratch_bump: Bump, +} + +impl Ctx { + pub(crate) fn bump(&self) -> Bump { + self.scratch_bump.clone() + } +} + +pub struct Env<'a, F: Function> { + pub func: &'a F, + pub env: &'a MachineEnv, + pub ctx: &'a mut Ctx, +} + +impl<'a, F: Function> Deref for Env<'a, F> { + type Target = Ctx; + + fn deref(&self) -> &Self::Target { + self.ctx + } +} + +impl<'a, F: Function> DerefMut for Env<'a, F> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.ctx + } } impl<'a, F: Function> Env<'a, F> { @@ -507,19 +542,11 @@ impl<'a, F: Function> Env<'a, F> { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct SpillSetRanges { pub btree: BTreeMap, } -impl SpillSetRanges { - pub fn new() -> Self { - Self { - btree: BTreeMap::new(), - } - } -} - #[derive(Clone, Debug)] pub struct SpillSlotData { pub ranges: SpillSetRanges, @@ -527,20 +554,13 @@ pub struct SpillSlotData { pub alloc: Allocation, } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct SpillSlotList { pub slots: SmallVec<[SpillSlotIndex; 32]>, pub probe_start: usize, } impl SpillSlotList { - pub fn new() -> Self { - SpillSlotList { - slots: smallvec![], - probe_start: 0, - } - } - /// Get the next spillslot index in probing order, wrapping around /// at the end of the slots list. pub(crate) fn next_index(&self, index: usize) -> usize { @@ -553,7 +573,7 @@ impl SpillSlotList { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct PrioQueue { pub heap: alloc::collections::BinaryHeap, } @@ -631,12 +651,6 @@ impl<'a> ContainerComparator for PrioQueueComparator<'a> { } impl PrioQueue { - pub fn new() -> Self { - PrioQueue { - heap: alloc::collections::BinaryHeap::new(), - } - } - #[inline(always)] pub fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { self.heap.push(PrioQueueEntry { @@ -660,7 +674,7 @@ impl PrioQueue { impl LiveRangeSet { pub(crate) fn new() -> Self { Self { - btree: BTreeMap::new(), + btree: BTreeMap::default(), } } } @@ -727,7 +741,7 @@ impl InsertedMoves { } } -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct Edits { edits: Vec<(PosWithPrio, Edit)>, } @@ -751,8 +765,8 @@ impl Edits { } #[inline(always)] - pub fn into_edits(self) -> impl Iterator { - self.edits.into_iter().map(|(pos, edit)| (pos.pos, edit)) + pub fn drain_edits(&mut self) -> impl Iterator + '_ { + self.edits.drain(..).map(|(pos, edit)| (pos.pos, edit)) } /// Sort edits by the combination of their program position and priority. This is a stable sort diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index d6d413dd..c518262b 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -13,21 +13,17 @@ //! Live-range computation. use super::{ - CodeRange, Env, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeListEntry, LiveRangeSet, - PRegData, PRegIndex, RegClass, Use, VRegData, VRegIndex, + CodeRange, Env, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, + LiveRangeSet, PRegData, PRegIndex, RegClass, Use, VRegData, VRegIndex, }; use crate::indexset::IndexSet; use crate::ion::data_structures::{ BlockparamIn, BlockparamOut, FixedRegFixupLevel, MultiFixedRegFixup, }; use crate::{ - Allocation, Block, Function, FxHashMap, FxHashSet, Inst, InstPosition, Operand, - OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg, + Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind, + OperandPos, PReg, ProgPoint, RegAllocError, VReg, VecExt, }; -use alloc::collections::VecDeque; -use alloc::vec; -use alloc::vec::Vec; -use slice_group_by::GroupByMut; use smallvec::{smallvec, SmallVec}; /// A spill weight computed for a certain Use. @@ -121,10 +117,10 @@ impl<'a, F: Function> Env<'a, F> { // Create VRegs from the vreg count. for idx in 0..self.func.num_vregs() { // We'll fill in the real details when we see the def. - self.vregs.add( + self.ctx.vregs.add( VReg::new(idx, RegClass::Int), VRegData { - ranges: smallvec![], + ranges: LiveRangeList::new_in(self.ctx.bump()), blockparam: Block::invalid(), // We'll learn the RegClass as we scan the code. class: None, @@ -133,10 +129,10 @@ impl<'a, F: Function> Env<'a, F> { } // Create allocations too. for inst in 0..self.func.num_insts() { - let start = self.allocs.len() as u32; - self.inst_alloc_offsets.push(start); + let start = self.output.allocs.len() as u32; + self.output.inst_alloc_offsets.push(start); for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { - self.allocs.push(Allocation::none()); + self.output.allocs.push(Allocation::none()); } } } @@ -195,7 +191,7 @@ impl<'a, F: Function> Env<'a, F> { { // Is not contiguous with previously-added (immediately // following) range; create a new range. - let lr = self.ranges.add(range); + let lr = self.ctx.ranges.add(range, self.ctx.bump()); self.ranges[lr].vreg = vreg; self.vregs[vreg] .ranges @@ -281,8 +277,9 @@ impl<'a, F: Function> Env<'a, F> { // Run a worklist algorithm to precisely compute liveins and // liveouts. - let mut workqueue = VecDeque::new(); - let mut workqueue_set = FxHashSet::default(); + let mut workqueue = core::mem::take(&mut self.ctx.scratch_workqueue); + let mut workqueue_set = core::mem::take(&mut self.ctx.scratch_workqueue_set); + workqueue_set.clear(); // Initialize workqueue with postorder traversal. for &block in &self.cfginfo.postorder[..] { workqueue.push_back(block); @@ -295,7 +292,7 @@ impl<'a, F: Function> Env<'a, F> { trace!("computing liveins for block{}", block.index()); - self.stats.livein_iterations += 1; + self.output.stats.livein_iterations += 1; let mut live = self.liveouts[block.index()].clone(); trace!(" -> initial liveout set: {:?}", live); @@ -338,7 +335,7 @@ impl<'a, F: Function> Env<'a, F> { } for &pred in self.func.block_preds(block) { - if self.liveouts[pred.index()].union_with(&live) { + if self.ctx.liveouts[pred.index()].union_with(&live) { if !workqueue_set.contains(&pred) { workqueue_set.insert(pred); workqueue.push_back(pred); @@ -359,6 +356,9 @@ impl<'a, F: Function> Env<'a, F> { return Err(RegAllocError::EntryLivein); } + self.ctx.scratch_workqueue = workqueue; + self.ctx.scratch_workqueue_set = workqueue_set; + Ok(()) } @@ -375,14 +375,15 @@ impl<'a, F: Function> Env<'a, F> { // // Invariant: a stale range may be present here; ranges are // only valid if `live.get(vreg)` is true. - let mut vreg_ranges: Vec = - vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; + let mut vreg_ranges = core::mem::take(&mut self.ctx.scratch_vreg_ranges); + vreg_ranges.repopulate(self.func.num_vregs(), LiveRangeIndex::invalid()); + let mut operand_rewrites = core::mem::take(&mut self.ctx.scratch_operand_rewrites); for i in (0..self.func.num_blocks()).rev() { let block = Block::new(i); let insns = self.func.block_insns(block); - self.stats.livein_blocks += 1; + self.output.stats.livein_blocks += 1; // Init our local live-in set. let mut live = self.liveouts[block.index()].clone(); @@ -476,7 +477,7 @@ impl<'a, F: Function> Env<'a, F> { // register can be used multiple times in the same // instruction is with an early-use and a late-def. Anything // else is a user error. - let mut operand_rewrites: FxHashMap = FxHashMap::default(); + operand_rewrites.clear(); let mut late_def_fixed: SmallVec<[PReg; 8]> = smallvec![]; for &operand in self.func.inst_operands(inst) { if let OperandConstraint::FixedReg(preg) = operand.constraint() { @@ -729,13 +730,13 @@ impl<'a, F: Function> Env<'a, F> { // when needed, here and then again at the end of allocation // when resolving moves. - for vreg in &mut self.vregs { + for vreg in &mut self.ctx.vregs { vreg.ranges.reverse(); let mut last = None; for entry in &mut vreg.ranges { // Ranges may have been truncated above at defs. We // need to update with the final range here. - entry.range = self.ranges[entry.index].range; + entry.range = self.ctx.ranges[entry.index].range; // Assert in-order and non-overlapping. debug_assert!(last.is_none() || last.unwrap() <= entry.range.from); last = Some(entry.range.to); @@ -750,9 +751,11 @@ impl<'a, F: Function> Env<'a, F> { self.blockparam_ins.sort_unstable_by_key(|x| x.key()); self.blockparam_outs.sort_unstable_by_key(|x| x.key()); - self.stats.initial_liverange_count = self.ranges.len(); - self.stats.blockparam_ins_count = self.blockparam_ins.len(); - self.stats.blockparam_outs_count = self.blockparam_outs.len(); + self.output.stats.initial_liverange_count = self.ranges.len(); + self.output.stats.blockparam_ins_count = self.blockparam_ins.len(); + self.output.stats.blockparam_outs_count = self.blockparam_outs.len(); + self.ctx.scratch_vreg_ranges = vreg_ranges; + self.ctx.scratch_operand_rewrites = operand_rewrites; } pub fn fixup_multi_fixed_vregs(&mut self) { @@ -774,7 +777,10 @@ impl<'a, F: Function> Env<'a, F> { trace!("multi-fixed-reg cleanup: vreg {:?} range {:?}", vreg, range,); // Find groups of uses that occur in at the same program point. - for uses in self.ranges[range].uses.linear_group_by_key_mut(|u| u.pos) { + for uses in self.ctx.ranges[range] + .uses + .chunk_by_mut(|a, b| a.pos == b.pos) + { if uses.len() < 2 { continue; } @@ -796,7 +802,7 @@ impl<'a, F: Function> Env<'a, F> { requires_reg = true; } OperandConstraint::FixedReg(preg) => { - if self.pregs[preg.index()].is_stack { + if self.ctx.pregs[preg.index()].is_stack { num_fixed_stack += 1; first_stack_slot.get_or_insert(u.slot); } else { @@ -842,14 +848,14 @@ impl<'a, F: Function> Env<'a, F> { // FixedReg constraint. If either condition is true, // we edit the constraint below; otherwise, we can // skip this edit. - if !(requires_reg && self.pregs[preg.index()].is_stack) + if !(requires_reg && self.ctx.pregs[preg.index()].is_stack) && *first_preg.get_or_insert(preg) == preg { continue; } trace!(" -> duplicate; switching to constraint Any"); - self.multi_fixed_reg_fixups.push(MultiFixedRegFixup { + self.ctx.multi_fixed_reg_fixups.push(MultiFixedRegFixup { pos: u.pos, from_slot: source_slot, to_slot: u.slot, diff --git a/src/ion/merge.rs b/src/ion/merge.rs index 380dde9e..f921620f 100644 --- a/src/ion/merge.rs +++ b/src/ion/merge.rs @@ -14,11 +14,13 @@ use super::{Env, LiveBundleIndex, SpillSet, SpillSlotIndex, VRegIndex}; use crate::{ - ion::data_structures::{BlockparamOut, CodeRange}, + ion::{ + data_structures::{BlockparamOut, CodeRange}, + LiveRangeList, + }, Function, Inst, OperandConstraint, OperandKind, PReg, ProgPoint, }; use alloc::format; -use smallvec::smallvec; impl<'a, F: Function> Env<'a, F> { pub fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { @@ -132,7 +134,8 @@ impl<'a, F: Function> Env<'a, F> { // `to` bundle is empty -- just move the list over from // `from` and set `bundle` up-link on all ranges. trace!(" -> to bundle{} is empty; trivial merge", to.index()); - let list = core::mem::replace(&mut self.bundles[from].ranges, smallvec![]); + let empty_vec = LiveRangeList::new_in(self.ctx.bump()); + let list = core::mem::replace(&mut self.bundles[from].ranges, empty_vec); for entry in &list { self.ranges[entry.index].bundle = to; @@ -170,10 +173,12 @@ impl<'a, F: Function> Env<'a, F> { // Two non-empty lists of LiveRanges: concatenate and // sort. This is faster than a mergesort-like merge into a new // list, empirically. - let from_list = core::mem::replace(&mut self.bundles[from].ranges, smallvec![]); + let empty_vec = LiveRangeList::new_in(self.ctx.bump()); + let from_list = core::mem::replace(&mut self.bundles[from].ranges, empty_vec); for entry in &from_list { self.ranges[entry.index].bundle = to; } + self.bundles[to].ranges.extend_from_slice(&from_list[..]); self.bundles[to] .ranges @@ -213,7 +218,7 @@ impl<'a, F: Function> Env<'a, F> { if self.bundles[from].spillset != self.bundles[to].spillset { // Widen the range for the target spillset to include the one being merged in. let from_range = self.spillsets[self.bundles[from].spillset].range; - let to_range = &mut self.spillsets[self.bundles[to].spillset].range; + let to_range = &mut self.ctx.spillsets[self.ctx.bundles[to].spillset].range; *to_range = to_range.join(from_range); } @@ -236,19 +241,19 @@ impl<'a, F: Function> Env<'a, F> { continue; } - let bundle = self.bundles.add(); + let bundle = self.ctx.bundles.add(self.ctx.bump()); let mut range = self.vregs[vreg].ranges.first().unwrap().range; self.bundles[bundle].ranges = self.vregs[vreg].ranges.clone(); trace!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); - for entry in &self.bundles[bundle].ranges { + for entry in &self.ctx.bundles[bundle].ranges { trace!( " -> with LR range{}: {:?}", entry.index.index(), entry.range ); range = range.join(entry.range); - self.ranges[entry.index].bundle = bundle; + self.ctx.ranges[entry.index].bundle = bundle; } let mut fixed = false; @@ -361,6 +366,6 @@ impl<'a, F: Function> Env<'a, F> { self.allocation_queue .insert(bundle, prio as usize, PReg::invalid()); } - self.stats.merged_bundle_count = self.allocation_queue.heap.len(); + self.output.stats.merged_bundle_count = self.allocation_queue.heap.len(); } } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index ca74e7a3..0de5f87a 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -13,13 +13,10 @@ //! Backtracking register allocator. See doc/DESIGN.md for details of //! its design. -use crate::cfg::CFGInfo; use crate::ssa::validate_ssa; -use crate::{Function, MachineEnv, Output, PReg, RegAllocError, RegClass}; -use alloc::vec; -use alloc::vec::Vec; - +use crate::{Function, MachineEnv, PReg, RegAllocError, RegClass, VecExt}; pub(crate) mod data_structures; +pub use data_structures::Ctx; pub use data_structures::Stats; use data_structures::*; pub(crate) mod reg_traversal; @@ -39,53 +36,48 @@ pub(crate) mod moves; pub(crate) mod spill; impl<'a, F: Function> Env<'a, F> { - pub(crate) fn new( - func: &'a F, - env: &'a MachineEnv, - cfginfo: CFGInfo, - annotations_enabled: bool, - ) -> Self { - let n = func.num_insts(); - Self { - func, - env, - cfginfo, - - liveins: Vec::with_capacity(func.num_blocks()), - liveouts: Vec::with_capacity(func.num_blocks()), - blockparam_outs: vec![], - blockparam_ins: vec![], - bundles: LiveBundles::with_capacity(n), - ranges: LiveRanges::with_capacity(4 * n), - spillsets: SpillSets::with_capacity(n), - vregs: VRegs::with_capacity(n), - pregs: vec![], - allocation_queue: PrioQueue::new(), - spilled_bundles: vec![], - spillslots: vec![], - slots_by_class: [ - SpillSlotList::new(), - SpillSlotList::new(), - SpillSlotList::new(), - ], - allocated_bundle_count: 0, - - extra_spillslots_by_class: [smallvec![], smallvec![], smallvec![]], - preferred_victim_by_class: [PReg::invalid(), PReg::invalid(), PReg::invalid()], - - multi_fixed_reg_fixups: vec![], - allocs: Vec::with_capacity(4 * n), - inst_alloc_offsets: vec![], - num_spillslots: 0, - debug_locations: vec![], - - stats: Stats::default(), - - debug_annotations: hashbrown::HashMap::new(), - annotations_enabled, - - conflict_set: Default::default(), + pub(crate) fn new(func: &'a F, env: &'a MachineEnv, ctx: &'a mut Ctx) -> Self { + let ninstrs = func.num_insts(); + let nblocks = func.num_blocks(); + + ctx.liveins.preallocate(nblocks); + ctx.liveouts.preallocate(nblocks); + ctx.blockparam_ins.clear(); + ctx.blockparam_outs.clear(); + ctx.ranges.preallocate(4 * ninstrs); + ctx.bundles.preallocate(ninstrs); + ctx.spillsets.preallocate(ninstrs); + ctx.vregs.preallocate(ninstrs); + for preg in ctx.pregs.iter_mut() { + preg.is_stack = false; + preg.allocations.btree.clear(); } + ctx.allocation_queue.heap.clear(); + ctx.spilled_bundles.clear(); + ctx.scratch_spillset_pool + .extend(ctx.spillslots.drain(..).map(|mut s| { + s.ranges.btree.clear(); + s.ranges + })); + ctx.slots_by_class = core::array::from_fn(|_| SpillSlotList::default()); + ctx.extra_spillslots_by_class = core::array::from_fn(|_| smallvec![]); + ctx.preferred_victim_by_class = [PReg::invalid(); 3]; + ctx.multi_fixed_reg_fixups.clear(); + ctx.allocated_bundle_count = 0; + ctx.debug_annotations.clear(); + ctx.scratch_bump + .get_mut() + .expect("we dropped all refs to this") + .reset(); + + ctx.output.allocs.preallocate(4 * ninstrs); + ctx.output.inst_alloc_offsets.clear(); + ctx.output.num_spillslots = 0; + ctx.output.debug_locations.clear(); + ctx.output.edits.clear(); + ctx.output.stats = Stats::default(); + + Self { func, env, ctx } } pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { @@ -106,38 +98,34 @@ impl<'a, F: Function> Env<'a, F> { self.try_allocating_regs_for_spilled_bundles(); self.allocate_spillslots(); let moves = self.apply_allocations_and_insert_moves(); - let edits = self.resolve_inserted_moves(moves); - Ok(edits) + Ok(self.resolve_inserted_moves(moves)) } } pub fn run( func: &F, mach_env: &MachineEnv, + ctx: &mut Ctx, enable_annotations: bool, enable_ssa_checker: bool, -) -> Result { - let cfginfo = CFGInfo::new(func)?; +) -> Result<(), RegAllocError> { + ctx.cfginfo.init(func, &mut ctx.cfginfo_ctx)?; if enable_ssa_checker { - validate_ssa(func, &cfginfo)?; + validate_ssa(func, &ctx.cfginfo)?; } - let mut env = Env::new(func, mach_env, cfginfo, enable_annotations); + ctx.annotations_enabled = enable_annotations; + let mut env = Env::new(func, mach_env, ctx); env.init()?; - let edits = env.run()?; + let mut edits = env.run()?; if enable_annotations { env.dump_results(); } - Ok(Output { - edits: edits.into_edits().collect(), - allocs: env.allocs, - inst_alloc_offsets: env.inst_alloc_offsets, - num_spillslots: env.num_spillslots as usize, - debug_locations: env.debug_locations, - stats: env.stats, - }) + ctx.output.edits.extend(edits.drain_edits()); + + Ok(()) } diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 90915367..22685e53 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -12,6 +12,8 @@ //! Move resolution. +use alloc::vec; + use super::{ Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex, RedundantMoveEliminator, VRegIndex, @@ -26,36 +28,38 @@ use crate::{ Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot, }; +use alloc::format; use alloc::vec::Vec; -use alloc::{format, vec}; use hashbrown::hash_map::Entry; use smallvec::{smallvec, SmallVec}; impl<'a, F: Function> Env<'a, F> { pub fn is_start_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst().index()]; - pos == self.cfginfo.block_entry[block.index()] + let block = self.ctx.cfginfo.insn_block[pos.inst().index()]; + pos == self.ctx.cfginfo.block_entry[block.index()] } pub fn is_end_of_block(&self, pos: ProgPoint) -> bool { - let block = self.cfginfo.insn_block[pos.inst().index()]; - pos == self.cfginfo.block_exit[block.index()] + let block = self.ctx.cfginfo.insn_block[pos.inst().index()]; + pos == self.ctx.cfginfo.block_exit[block.index()] } pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { - let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + let inst_allocs = + &self.ctx.output.allocs[self.ctx.output.inst_alloc_offsets[inst.index()] as usize..]; inst_allocs[slot] } pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { - let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; + let inst_allocs = &mut self.ctx.output.allocs + [self.ctx.output.inst_alloc_offsets[inst.index()] as usize..]; inst_allocs[slot] = alloc; } pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { trace!("get_alloc_for_range: {:?}", range); - let bundle = self.ranges[range].bundle; + let bundle = self.ctx.ranges[range].bundle; trace!(" -> bundle: {:?}", bundle); - let bundledata = &self.bundles[bundle]; + let bundledata = &self.ctx.bundles[bundle]; trace!(" -> allocation {:?}", bundledata.allocation); if bundledata.allocation != Allocation::none() { bundledata.allocation @@ -63,9 +67,9 @@ impl<'a, F: Function> Env<'a, F> { trace!(" -> spillset {:?}", bundledata.spillset); trace!( " -> spill slot {:?}", - self.spillsets[bundledata.spillset].slot + self.ctx.spillsets[bundledata.spillset].slot ); - self.spillslots[self.spillsets[bundledata.spillset].slot.index()].alloc + self.ctx.spillslots[self.ctx.spillsets[bundledata.spillset].slot.index()].alloc } } @@ -78,9 +82,9 @@ impl<'a, F: Function> Env<'a, F> { // Now that all splits are done, we can pay the cost once to // sort VReg range lists and update with the final ranges. - for vreg in &mut self.vregs { + for vreg in &mut self.ctx.vregs { for entry in &mut vreg.ranges { - entry.range = self.ranges[entry.index].range; + entry.range = self.ctx.ranges[entry.index].range; } vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); } @@ -610,13 +614,16 @@ impl<'a, F: Function> Env<'a, F> { let from = core::cmp::max(label_from, range.from); let to = core::cmp::min(label_to, range.to); - self.debug_locations.push((label, from, to, alloc)); + self.ctx + .output + .debug_locations + .push((label, from, to, alloc)); } } } if !inter_block_dests.is_empty() { - self.stats.halfmoves_count += inter_block_dests.len() * 2; + self.output.stats.halfmoves_count += inter_block_dests.len() * 2; inter_block_dests.sort_unstable_by_key(InterBlockDest::key); @@ -642,8 +649,8 @@ impl<'a, F: Function> Env<'a, F> { } if !block_param_dests.is_empty() { - self.stats.halfmoves_count += block_param_sources.len(); - self.stats.halfmoves_count += block_param_dests.len(); + self.output.stats.halfmoves_count += block_param_sources.len(); + self.output.stats.halfmoves_count += block_param_dests.len(); trace!("processing block-param moves"); for dest in block_param_dests { @@ -763,7 +770,7 @@ impl<'a, F: Function> Env<'a, F> { // Sort the debug-locations vector; we provide this // invariant to the client. - self.debug_locations.sort_unstable(); + self.output.debug_locations.sort_unstable(); inserted_moves } @@ -988,7 +995,7 @@ impl<'a, F: Function> Env<'a, F> { // parallel-move resolver for all moves within a single sort // key. edits.sort(); - self.stats.edits_count = edits.len(); + self.output.stats.edits_count = edits.len(); // Add debug annotations. if self.annotations_enabled { diff --git a/src/ion/process.rs b/src/ion/process.rs index 0e07340f..f3b91532 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -22,34 +22,34 @@ use crate::{ CodeRange, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MAX_SPLITS_PER_SPILLSET, MINIMAL_BUNDLE_SPILL_WEIGHT, MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT, }, - Allocation, Function, FxHashSet, Inst, InstPosition, OperandConstraint, OperandKind, PReg, - ProgPoint, RegAllocError, + Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint, + RegAllocError, }; use core::fmt::Debug; use smallvec::{smallvec, SmallVec}; #[derive(Clone, Debug, PartialEq, Eq)] -pub enum AllocRegResult { +pub enum AllocRegResult<'a> { Allocated(Allocation), - Conflict(LiveBundleVec, ProgPoint), + Conflict(&'a [LiveBundleIndex], ProgPoint), ConflictWithFixed(u32, ProgPoint), ConflictHighCost, } impl<'a, F: Function> Env<'a, F> { pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { - while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { - self.stats.process_bundle_count += 1; + while let Some((bundle, reg_hint)) = self.ctx.allocation_queue.pop() { + self.ctx.output.stats.process_bundle_count += 1; self.process_bundle(bundle, reg_hint)?; } - self.stats.final_liverange_count = self.ranges.len(); - self.stats.final_bundle_count = self.bundles.len(); - self.stats.spill_bundle_count = self.spilled_bundles.len(); + self.ctx.output.stats.final_liverange_count = self.ranges.len(); + self.ctx.output.stats.final_bundle_count = self.bundles.len(); + self.ctx.output.stats.spill_bundle_count = self.spilled_bundles.len(); Ok(()) } - pub fn try_to_allocate_bundle_to_reg( + pub fn try_to_allocate_bundle_to_reg<'b>( &mut self, bundle: LiveBundleIndex, reg: PRegIndex, @@ -57,10 +57,11 @@ impl<'a, F: Function> Env<'a, F> { // cost (if provided), just return // `AllocRegResult::ConflictHighCost`. max_allowable_cost: Option, - ) -> AllocRegResult { + conflicts: &'b mut LiveBundleVec, + ) -> AllocRegResult<'b> { trace!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); - let mut conflicts = smallvec![]; - self.conflict_set.clear(); + conflicts.clear(); + self.ctx.conflict_set.clear(); let mut max_conflict_weight = 0; // Traverse the BTreeMap in order by requesting the whole // range spanned by the bundle and iterating over that @@ -75,12 +76,12 @@ impl<'a, F: Function> Env<'a, F> { // *overlap*, so we are checking whether the BTree contains // any preg range that *overlaps* with range `range`, not // literally the range `range`. - let bundle_ranges = &self.bundles[bundle].ranges; + let bundle_ranges = &self.ctx.bundles[bundle].ranges; let from_key = LiveRangeKey::from_range(&CodeRange { from: bundle_ranges.first().unwrap().range.from, to: bundle_ranges.first().unwrap().range.from, }); - let mut preg_range_iter = self.pregs[reg.index()] + let mut preg_range_iter = self.ctx.pregs[reg.index()] .allocations .btree .range(from_key..) @@ -89,7 +90,7 @@ impl<'a, F: Function> Env<'a, F> { "alloc map for {:?} in range {:?}..: {:?}", reg, from_key, - self.pregs[reg.index()].allocations.btree + self.ctx.pregs[reg.index()].allocations.btree ); let mut first_conflict: Option = None; @@ -118,7 +119,7 @@ impl<'a, F: Function> Env<'a, F> { from: from_pos, to: from_pos, }); - preg_range_iter = self.pregs[reg.index()] + preg_range_iter = self.ctx.pregs[reg.index()] .allocations .btree .range(from_key..) @@ -151,16 +152,16 @@ impl<'a, F: Function> Env<'a, F> { trace!(" -> btree contains range {:?} that overlaps", preg_range); if preg_range.is_valid() { - trace!(" -> from vreg {:?}", self.ranges[*preg_range].vreg); + trace!(" -> from vreg {:?}", self.ctx.ranges[*preg_range].vreg); // range from an allocated bundle: find the bundle and add to // conflicts list. - let conflict_bundle = self.ranges[*preg_range].bundle; + let conflict_bundle = self.ctx.ranges[*preg_range].bundle; trace!(" -> conflict bundle {:?}", conflict_bundle); - if self.conflict_set.insert(conflict_bundle) { + if self.ctx.conflict_set.insert(conflict_bundle) { conflicts.push(conflict_bundle); max_conflict_weight = core::cmp::max( max_conflict_weight, - self.bundles[conflict_bundle].cached_spill_weight(), + self.ctx.bundles[conflict_bundle].cached_spill_weight(), ); if max_allowable_cost.is_some() && max_conflict_weight > max_allowable_cost.unwrap() @@ -194,10 +195,10 @@ impl<'a, F: Function> Env<'a, F> { // We can allocate! Add our ranges to the preg's BTree. let preg = PReg::from_index(reg.index()); trace!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); - self.bundles[bundle].allocation = Allocation::reg(preg); - for entry in &self.bundles[bundle].ranges { + self.ctx.bundles[bundle].allocation = Allocation::reg(preg); + for entry in &self.ctx.bundles[bundle].ranges { let key = LiveRangeKey::from_range(&entry.range); - let res = self.pregs[reg.index()] + let res = self.ctx.pregs[reg.index()] .allocations .btree .insert(key, entry.index); @@ -213,43 +214,44 @@ impl<'a, F: Function> Env<'a, F> { trace!( "evicting bundle {:?}: alloc {:?}", bundle, - self.bundles[bundle].allocation + self.ctx.bundles[bundle].allocation ); - let preg = match self.bundles[bundle].allocation.as_reg() { + let preg = match self.ctx.bundles[bundle].allocation.as_reg() { Some(preg) => preg, None => { trace!( " -> has no allocation! {:?}", - self.bundles[bundle].allocation + self.ctx.bundles[bundle].allocation ); return; } }; let preg_idx = PRegIndex::new(preg.index()); - self.bundles[bundle].allocation = Allocation::none(); - for entry in &self.bundles[bundle].ranges { + self.ctx.bundles[bundle].allocation = Allocation::none(); + for entry in &self.ctx.bundles[bundle].ranges { trace!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); - self.pregs[preg_idx.index()] + self.ctx.pregs[preg_idx.index()] .allocations .btree .remove(&LiveRangeKey::from_range(&entry.range)); } - let prio = self.bundles[bundle].prio; + let prio = self.ctx.bundles[bundle].prio; trace!(" -> prio {}; back into queue", prio); - self.allocation_queue + self.ctx + .allocation_queue .insert(bundle, prio as usize, PReg::invalid()); } pub fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { - self.bundles[bundle].cached_spill_weight() + self.ctx.bundles[bundle].cached_spill_weight() } - pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { + pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &[LiveBundleIndex]) -> u32 { trace!("maximum_spill_weight_in_bundle_set: {:?}", bundles); let m = bundles .iter() .map(|&b| { - let w = self.bundles[b].cached_spill_weight(); + let w = self.ctx.bundles[b].cached_spill_weight(); trace!("bundle{}: {}", b.index(), w); w }) @@ -265,11 +267,11 @@ impl<'a, F: Function> Env<'a, F> { let minimal; let mut fixed = false; let mut fixed_def = false; - let bundledata = &self.bundles[bundle]; + let bundledata = &self.ctx.bundles[bundle]; let first_range = bundledata.ranges[0].index; - let first_range_data = &self.ranges[first_range]; + let first_range_data = &self.ctx.ranges[first_range]; - self.bundles[bundle].prio = self.compute_bundle_prio(bundle); + self.ctx.bundles[bundle].prio = self.compute_bundle_prio(bundle); if first_range_data.vreg.is_invalid() { trace!(" -> no vreg; minimal and fixed"); @@ -294,8 +296,8 @@ impl<'a, F: Function> Env<'a, F> { // i.e. X.Before..X.After, or two ProgPoints, // i.e. X.Before..X+1.Before. trace!(" -> first range has range {:?}", first_range_data.range); - let bundle_start = self.bundles[bundle].ranges.first().unwrap().range.from; - let bundle_end = self.bundles[bundle].ranges.last().unwrap().range.to; + let bundle_start = self.ctx.bundles[bundle].ranges.first().unwrap().range.from; + let bundle_end = self.ctx.bundles[bundle].ranges.last().unwrap().range.to; minimal = bundle_start.inst() == bundle_end.prev().inst(); trace!(" -> minimal: {}", minimal); } @@ -310,8 +312,8 @@ impl<'a, F: Function> Env<'a, F> { } } else { let mut total = SpillWeight::zero(); - for entry in &self.bundles[bundle].ranges { - let range_data = &self.ranges[entry.index]; + for entry in &self.ctx.bundles[bundle].ranges { + let range_data = &self.ctx.ranges[entry.index]; trace!( " -> uses spill weight: +{:?}", range_data.uses_spill_weight() @@ -319,11 +321,11 @@ impl<'a, F: Function> Env<'a, F> { total = total + range_data.uses_spill_weight(); } - if self.bundles[bundle].prio > 0 { - let final_weight = (total.to_f32() as u32) / self.bundles[bundle].prio; + if self.ctx.bundles[bundle].prio > 0 { + let final_weight = (total.to_f32() as u32) / self.ctx.bundles[bundle].prio; trace!( " -> dividing by prio {}; final weight {}", - self.bundles[bundle].prio, + self.ctx.bundles[bundle].prio, final_weight ); core::cmp::min(BUNDLE_MAX_NORMAL_SPILL_WEIGHT, final_weight) @@ -332,7 +334,7 @@ impl<'a, F: Function> Env<'a, F> { } }; - self.bundles[bundle].set_cached_spill_weight_and_props( + self.ctx.bundles[bundle].set_cached_spill_weight_and_props( spill_weight, minimal, fixed, @@ -341,11 +343,11 @@ impl<'a, F: Function> Env<'a, F> { } pub fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { - self.bundles[bundle].cached_minimal() + self.ctx.bundles[bundle].cached_minimal() } pub fn recompute_range_properties(&mut self, range: LiveRangeIndex) { - let rangedata = &mut self.ranges[range]; + let rangedata = &mut self.ctx.ranges[range]; let mut w = SpillWeight::zero(); for u in &rangedata.uses { w = w + SpillWeight::from_bits(u.weight); @@ -368,15 +370,15 @@ impl<'a, F: Function> Env<'a, F> { bundle: LiveBundleIndex, create_if_absent: bool, ) -> Option { - let ssidx = self.bundles[bundle].spillset; - let idx = self.spillsets[ssidx].spill_bundle; + let ssidx = self.ctx.bundles[bundle].spillset; + let idx = self.ctx.spillsets[ssidx].spill_bundle; if idx.is_valid() { Some(idx) } else if create_if_absent { - let idx = self.bundles.add(); - self.spillsets[ssidx].spill_bundle = idx; - self.bundles[idx].spillset = ssidx; - self.spilled_bundles.push(idx); + let idx = self.ctx.bundles.add(self.ctx.bump()); + self.ctx.spillsets[ssidx].spill_bundle = idx; + self.ctx.bundles[idx].spillset = ssidx; + self.ctx.spilled_bundles.push(idx); Some(idx) } else { None @@ -392,7 +394,7 @@ impl<'a, F: Function> Env<'a, F> { // spill bundle? mut trim_ends_into_spill_bundle: bool, ) { - self.stats.splits += 1; + self.ctx.output.stats.splits += 1; trace!( "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", bundle, @@ -404,24 +406,24 @@ impl<'a, F: Function> Env<'a, F> { // bundles (and updating vregs' linked lists appropriately), // and enqueue the new bundles. - let spillset = self.bundles[bundle].spillset; + let spillset = self.ctx.bundles[bundle].spillset; // Have we reached the maximum split count? If so, fall back // to a "minimal bundles and spill bundle" setup for this // bundle. See the doc-comment on // `split_into_minimal_bundles()` above for more. - if self.spillsets[spillset].splits >= MAX_SPLITS_PER_SPILLSET { + if self.ctx.spillsets[spillset].splits >= MAX_SPLITS_PER_SPILLSET { self.split_into_minimal_bundles(bundle, reg_hint); return; } - self.spillsets[spillset].splits += 1; + self.ctx.spillsets[spillset].splits += 1; - debug_assert!(!self.bundles[bundle].ranges.is_empty()); + debug_assert!(!self.ctx.bundles[bundle].ranges.is_empty()); // Split point *at* start is OK; this means we peel off // exactly one use to create a minimal bundle. - let bundle_start = self.bundles[bundle].ranges.first().unwrap().range.from; + let bundle_start = self.ctx.bundles[bundle].ranges.first().unwrap().range.from; debug_assert!(split_at >= bundle_start); - let bundle_end = self.bundles[bundle].ranges.last().unwrap().range.to; + let bundle_end = self.ctx.bundles[bundle].ranges.last().unwrap().range.to; debug_assert!(split_at < bundle_end); // Is the split point *at* the start? If so, peel off the @@ -430,8 +432,8 @@ impl<'a, F: Function> Env<'a, F> { if split_at == bundle_start { // Find any uses; if none, just chop off one instruction. let mut first_use = None; - 'outer: for entry in &self.bundles[bundle].ranges { - for u in &self.ranges[entry.index].uses { + 'outer: for entry in &self.ctx.bundles[bundle].ranges { + for u in &self.ctx.ranges[entry.index].uses { first_use = Some(u.pos); break 'outer; } @@ -446,7 +448,7 @@ impl<'a, F: Function> Env<'a, F> { } } None => ProgPoint::before( - self.bundles[bundle] + self.ctx.bundles[bundle] .ranges .first() .unwrap() @@ -478,11 +480,11 @@ impl<'a, F: Function> Env<'a, F> { // which LR we need to split down the middle, then update the // current bundle, create a new one, and (re)-queue both. - trace!(" -> LRs: {:?}", self.bundles[bundle].ranges); + trace!(" -> LRs: {:?}", self.ctx.bundles[bundle].ranges); let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle - for (i, entry) in self.bundles[bundle].ranges.iter().enumerate() { + for (i, entry) in self.ctx.bundles[bundle].ranges.iter().enumerate() { if split_at > entry.range.from { last_lr_in_old_bundle_idx = i; first_lr_in_new_bundle_idx = i; @@ -492,8 +494,8 @@ impl<'a, F: Function> Env<'a, F> { // When the bundle contains a fixed constraint, we advance the split point to right // before the first instruction with a fixed use present. - if self.bundles[bundle].cached_fixed() { - for u in &self.ranges[entry.index].uses { + if self.ctx.bundles[bundle].cached_fixed() { + for u in &self.ctx.ranges[entry.index].uses { if u.pos < split_at { continue; } @@ -520,24 +522,26 @@ impl<'a, F: Function> Env<'a, F> { trace!( " -> last LR in old bundle: LR {:?}", - self.bundles[bundle].ranges[last_lr_in_old_bundle_idx] + self.ctx.bundles[bundle].ranges[last_lr_in_old_bundle_idx] ); trace!( " -> first LR in new bundle: LR {:?}", - self.bundles[bundle].ranges[first_lr_in_new_bundle_idx] + self.ctx.bundles[bundle].ranges[first_lr_in_new_bundle_idx] ); // Take the sublist of LRs that will go in the new bundle. - let mut new_lr_list: LiveRangeList = self.bundles[bundle] - .ranges - .iter() - .cloned() - .skip(first_lr_in_new_bundle_idx) - .collect(); - self.bundles[bundle] + let mut new_lr_list: LiveRangeList = LiveRangeList::new_in(self.ctx.bump()); + new_lr_list.extend( + self.ctx.bundles[bundle] + .ranges + .iter() + .cloned() + .skip(first_lr_in_new_bundle_idx), + ); + self.ctx.bundles[bundle] .ranges .truncate(last_lr_in_old_bundle_idx + 1); - self.bundles[bundle].ranges.shrink_to_fit(); + self.ctx.bundles[bundle].ranges.shrink_to_fit(); // If the first entry in `new_lr_list` is a LR that is split // down the middle, replace it with a new LR and chop off the @@ -545,33 +549,38 @@ impl<'a, F: Function> Env<'a, F> { if split_at > new_lr_list[0].range.from { debug_assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); let orig_lr = new_lr_list[0].index; - let new_lr = self.ranges.add(CodeRange { - from: split_at, - to: new_lr_list[0].range.to, - }); - self.ranges[new_lr].vreg = self.ranges[orig_lr].vreg; + let new_lr = self.ctx.ranges.add( + CodeRange { + from: split_at, + to: new_lr_list[0].range.to, + }, + self.ctx.bump(), + ); + self.ctx.ranges[new_lr].vreg = self.ranges[orig_lr].vreg; trace!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); - let first_use = self.ranges[orig_lr] + let first_use = self.ctx.ranges[orig_lr] .uses .iter() .position(|u| u.pos >= split_at) - .unwrap_or(self.ranges[orig_lr].uses.len()); - let rest_uses: UseList = self.ranges[orig_lr] - .uses - .iter() - .cloned() - .skip(first_use) - .collect(); - self.ranges[new_lr].uses = rest_uses; - self.ranges[orig_lr].uses.truncate(first_use); - self.ranges[orig_lr].uses.shrink_to_fit(); + .unwrap_or(self.ctx.ranges[orig_lr].uses.len()); + let mut rest_uses = UseList::new_in(self.ctx.bump()); + rest_uses.extend( + self.ctx.ranges[orig_lr] + .uses + .iter() + .cloned() + .skip(first_use), + ); + self.ctx.ranges[new_lr].uses = rest_uses; + self.ctx.ranges[orig_lr].uses.truncate(first_use); + self.ctx.ranges[orig_lr].uses.shrink_to_fit(); self.recompute_range_properties(orig_lr); self.recompute_range_properties(new_lr); new_lr_list[0].index = new_lr; - new_lr_list[0].range = self.ranges[new_lr].range; - self.ranges[orig_lr].range.to = split_at; - self.bundles[bundle].ranges[last_lr_in_old_bundle_idx].range = - self.ranges[orig_lr].range; + new_lr_list[0].range = self.ctx.ranges[new_lr].range; + self.ctx.ranges[orig_lr].range.to = split_at; + self.ctx.bundles[bundle].ranges[last_lr_in_old_bundle_idx].range = + self.ctx.ranges[orig_lr].range; // Perform a lazy split in the VReg data. We just // append the new LR and its range; we will sort by @@ -579,21 +588,21 @@ impl<'a, F: Function> Env<'a, F> { // iterate over the VReg's ranges after allocation // completes (this is the only time when order // matters). - self.vregs[self.ranges[new_lr].vreg] + self.ctx.vregs[self.ctx.ranges[new_lr].vreg] .ranges .push(LiveRangeListEntry { - range: self.ranges[new_lr].range, + range: self.ctx.ranges[new_lr].range, index: new_lr, }); } - let new_bundle = self.bundles.add(); + let new_bundle = self.ctx.bundles.add(self.ctx.bump()); trace!(" -> creating new bundle {:?}", new_bundle); - self.bundles[new_bundle].spillset = spillset; + self.ctx.bundles[new_bundle].spillset = spillset; for entry in &new_lr_list { - self.ranges[entry.index].bundle = new_bundle; + self.ctx.ranges[entry.index].bundle = new_bundle; } - self.bundles[new_bundle].ranges = new_lr_list; + self.ctx.bundles[new_bundle].ranges = new_lr_list; if trim_ends_into_spill_bundle { // Finally, handle moving LRs to the spill bundle when @@ -603,10 +612,10 @@ impl<'a, F: Function> Env<'a, F> { // the spill bundle. (We are careful to treat the "starts at // def" flag as an implicit first def even if no def-type Use // is present.) - while let Some(entry) = self.bundles[bundle].ranges.last().cloned() { + while let Some(entry) = self.ctx.bundles[bundle].ranges.last().cloned() { let end = entry.range.to; - let vreg = self.ranges[entry.index].vreg; - let last_use = self.ranges[entry.index].uses.last().map(|u| u.pos); + let vreg = self.ctx.ranges[entry.index].vreg; + let last_use = self.ctx.ranges[entry.index].uses.last().map(|u| u.pos); if last_use.is_none() { let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) @@ -617,9 +626,9 @@ impl<'a, F: Function> Env<'a, F> { entry.index, spill ); - self.bundles[spill].ranges.push(entry); - self.bundles[bundle].ranges.pop(); - self.ranges[entry.index].bundle = spill; + self.ctx.bundles[spill].ranges.push(entry); + self.ctx.bundles[bundle].ranges.pop(); + self.ctx.ranges[entry.index].bundle = spill; continue; } let last_use = last_use.unwrap(); @@ -628,21 +637,21 @@ impl<'a, F: Function> Env<'a, F> { let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); - self.bundles[bundle].ranges.last_mut().unwrap().range.to = split; - self.ranges[self.bundles[bundle].ranges.last().unwrap().index] + self.ctx.bundles[bundle].ranges.last_mut().unwrap().range.to = split; + self.ctx.ranges[self.ctx.bundles[bundle].ranges.last().unwrap().index] .range .to = split; let range = CodeRange { from: split, to: end, }; - let empty_lr = self.ranges.add(range); - self.bundles[spill].ranges.push(LiveRangeListEntry { + let empty_lr = self.ctx.ranges.add(range, self.ctx.bump()); + self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); - self.ranges[empty_lr].bundle = spill; - self.vregs[vreg].ranges.push(LiveRangeListEntry { + self.ctx.ranges[empty_lr].bundle = spill; + self.ctx.vregs[vreg].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); @@ -660,13 +669,13 @@ impl<'a, F: Function> Env<'a, F> { } break; } - while let Some(entry) = self.bundles[new_bundle].ranges.first().cloned() { - if self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) { + while let Some(entry) = self.ctx.bundles[new_bundle].ranges.first().cloned() { + if self.ctx.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) { break; } let start = entry.range.from; - let vreg = self.ranges[entry.index].vreg; - let first_use = self.ranges[entry.index].uses.first().map(|u| u.pos); + let vreg = self.ctx.ranges[entry.index].vreg; + let first_use = self.ctx.ranges[entry.index].uses.first().map(|u| u.pos); if first_use.is_none() { let spill = self .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) @@ -677,9 +686,9 @@ impl<'a, F: Function> Env<'a, F> { entry.index, spill ); - self.bundles[spill].ranges.push(entry); - self.bundles[new_bundle].ranges.drain(..1); - self.ranges[entry.index].bundle = spill; + self.ctx.bundles[spill].ranges.push(entry); + self.ctx.bundles[new_bundle].ranges.drain(..1); + self.ctx.ranges[entry.index].bundle = spill; continue; } let first_use = first_use.unwrap(); @@ -688,26 +697,26 @@ impl<'a, F: Function> Env<'a, F> { let spill = self .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) .unwrap(); - self.bundles[new_bundle] + self.ctx.bundles[new_bundle] .ranges .first_mut() .unwrap() .range .from = split; - self.ranges[self.bundles[new_bundle].ranges.first().unwrap().index] + self.ctx.ranges[self.ctx.bundles[new_bundle].ranges.first().unwrap().index] .range .from = split; let range = CodeRange { from: start, to: split, }; - let empty_lr = self.ranges.add(range); - self.bundles[spill].ranges.push(LiveRangeListEntry { + let empty_lr = self.ctx.ranges.add(range, self.ctx.bump()); + self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); - self.ranges[empty_lr].bundle = spill; - self.vregs[vreg].ranges.push(LiveRangeListEntry { + self.ctx.ranges[empty_lr].bundle = spill; + self.ctx.vregs[vreg].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); @@ -727,16 +736,18 @@ impl<'a, F: Function> Env<'a, F> { } } - if self.bundles[bundle].ranges.len() > 0 { + if self.ctx.bundles[bundle].ranges.len() > 0 { self.recompute_bundle_properties(bundle); - let prio = self.bundles[bundle].prio; - self.allocation_queue + let prio = self.ctx.bundles[bundle].prio; + self.ctx + .allocation_queue .insert(bundle, prio as usize, reg_hint); } - if self.bundles[new_bundle].ranges.len() > 0 { + if self.ctx.bundles[new_bundle].ranges.len() > 0 { self.recompute_bundle_properties(new_bundle); - let prio = self.bundles[new_bundle].prio; - self.allocation_queue + let prio = self.ctx.bundles[new_bundle].prio; + self.ctx + .allocation_queue .insert(new_bundle, prio as usize, reg_hint); } } @@ -772,12 +783,13 @@ impl<'a, F: Function> Env<'a, F> { /// registers just at uses/defs and moves the "spilled" value /// into/out of them immediately. pub fn split_into_minimal_bundles(&mut self, bundle: LiveBundleIndex, reg_hint: PReg) { - let mut removed_lrs: FxHashSet = FxHashSet::default(); - let mut removed_lrs_vregs: FxHashSet = FxHashSet::default(); + assert_eq!(self.ctx.scratch_removed_lrs_vregs.len(), 0); + self.ctx.scratch_removed_lrs.clear(); + let mut new_lrs: SmallVec<[(VRegIndex, LiveRangeIndex); 16]> = smallvec![]; let mut new_bundles: SmallVec<[LiveBundleIndex; 16]> = smallvec![]; - let spillset = self.bundles[bundle].spillset; + let spillset = self.ctx.bundles[bundle].spillset; let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); @@ -793,22 +805,25 @@ impl<'a, F: Function> Env<'a, F> { let mut last_inst: Option = None; let mut last_vreg: Option = None; - let mut spill_uses = UseList::new(); + let mut spill_uses = UseList::new_in(self.ctx.bump()); - for entry in core::mem::take(&mut self.bundles[bundle].ranges) { + let empty_vec = LiveRangeList::new_in(self.ctx.bump()); + for entry in core::mem::replace(&mut self.ctx.bundles[bundle].ranges, empty_vec) { let lr_from = entry.range.from; let lr_to = entry.range.to; - let vreg = self.ranges[entry.index].vreg; + let vreg = self.ctx.ranges[entry.index].vreg; + + self.ctx.scratch_removed_lrs.insert(entry.index); + self.ctx.scratch_removed_lrs_vregs.insert(vreg); - removed_lrs.insert(entry.index); - removed_lrs_vregs.insert(vreg); trace!(" -> removing old LR {:?} for vreg {:?}", entry.index, vreg); let mut spill_range = entry.range; let mut spill_starts_def = false; let mut last_live_pos = entry.range.from; - for u in core::mem::take(&mut self.ranges[entry.index].uses) { + let empty_vec = UseList::new_in(self.ctx.bump()); + for u in core::mem::replace(&mut self.ctx.ranges[entry.index].uses, empty_vec) { trace!(" -> use {:?} (last_live_pos {:?})", u, last_live_pos); let is_def = u.operand.kind() == OperandKind::Def; @@ -839,7 +854,7 @@ impl<'a, F: Function> Env<'a, F> { // If we just created a LR for this inst at the last // pos, add this use to the same LR. if Some(u.pos.inst()) == last_inst && Some(vreg) == last_vreg { - self.ranges[last_lr.unwrap()].uses.push(u); + self.ctx.ranges[last_lr.unwrap()].uses.push(u); trace!(" -> appended to last LR {:?}", last_lr.unwrap()); continue; } @@ -855,10 +870,10 @@ impl<'a, F: Function> Env<'a, F> { // new bundle. if Some(u.pos.inst()) == last_inst { let cr = CodeRange { from: u.pos, to }; - let lr = self.ranges.add(cr); + let lr = self.ctx.ranges.add(cr, self.ctx.bump()); new_lrs.push((vreg, lr)); - self.ranges[lr].uses.push(u); - self.ranges[lr].vreg = vreg; + self.ctx.ranges[lr].uses.push(u); + self.ctx.ranges[lr].vreg = vreg; trace!( " -> created new LR {:?} but adding to existing bundle {:?}", @@ -866,21 +881,21 @@ impl<'a, F: Function> Env<'a, F> { last_bundle.unwrap() ); // Edit the previous LR to end mid-inst. - self.bundles[last_bundle.unwrap()] + self.ctx.bundles[last_bundle.unwrap()] .ranges .last_mut() .unwrap() .range .to = u.pos; - self.ranges[last_lr.unwrap()].range.to = u.pos; + self.ctx.ranges[last_lr.unwrap()].range.to = u.pos; // Add this LR to the bundle. - self.bundles[last_bundle.unwrap()] + self.ctx.bundles[last_bundle.unwrap()] .ranges .push(LiveRangeListEntry { range: cr, index: lr, }); - self.ranges[lr].bundle = last_bundle.unwrap(); + self.ctx.ranges[lr].bundle = last_bundle.unwrap(); last_live_pos = ProgPoint::before(u.pos.inst().next()); continue; } @@ -889,24 +904,26 @@ impl<'a, F: Function> Env<'a, F> { let pos = ProgPoint::before(u.pos.inst()); let pos = core::cmp::max(lr_from, pos); let cr = CodeRange { from: pos, to }; - let lr = self.ranges.add(cr); + let lr = self.ctx.ranges.add(cr, self.ctx.bump()); new_lrs.push((vreg, lr)); - self.ranges[lr].uses.push(u); - self.ranges[lr].vreg = vreg; + self.ctx.ranges[lr].uses.push(u); + self.ctx.ranges[lr].vreg = vreg; // Create a new bundle that contains only this LR. - let new_bundle = self.bundles.add(); - self.ranges[lr].bundle = new_bundle; - self.bundles[new_bundle].spillset = spillset; - self.bundles[new_bundle].ranges.push(LiveRangeListEntry { - range: cr, - index: lr, - }); + let new_bundle = self.ctx.bundles.add(self.ctx.bump()); + self.ctx.ranges[lr].bundle = new_bundle; + self.ctx.bundles[new_bundle].spillset = spillset; + self.ctx.bundles[new_bundle] + .ranges + .push(LiveRangeListEntry { + range: cr, + index: lr, + }); new_bundles.push(new_bundle); // If this use was a Def, set the StartsAtDef flag for the new LR. if is_def { - self.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); + self.ctx.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); } trace!( @@ -928,21 +945,21 @@ impl<'a, F: Function> Env<'a, F> { // Make one entry in the spill bundle that covers the whole range. // TODO: it might be worth tracking enough state to only create this LR when there is // open space in the original LR. - let spill_lr = self.ranges.add(spill_range); - self.ranges[spill_lr].vreg = vreg; - self.ranges[spill_lr].bundle = spill; - self.ranges[spill_lr].uses.extend(spill_uses.drain(..)); + let spill_lr = self.ctx.ranges.add(spill_range, self.ctx.bump()); + self.ctx.ranges[spill_lr].vreg = vreg; + self.ctx.ranges[spill_lr].bundle = spill; + self.ctx.ranges[spill_lr].uses.extend(spill_uses.drain(..)); new_lrs.push((vreg, spill_lr)); if spill_starts_def { - self.ranges[spill_lr].set_flag(LiveRangeFlag::StartsAtDef); + self.ctx.ranges[spill_lr].set_flag(LiveRangeFlag::StartsAtDef); } - self.bundles[spill].ranges.push(LiveRangeListEntry { + self.ctx.bundles[spill].ranges.push(LiveRangeListEntry { range: spill_range, index: spill_lr, }); - self.ranges[spill_lr].bundle = spill; + self.ctx.ranges[spill_lr].bundle = spill; trace!( " -> added spill range {:?} in new LR {:?} in spill bundle {:?}", spill_range, @@ -955,26 +972,28 @@ impl<'a, F: Function> Env<'a, F> { } // Remove all of the removed LRs from respective vregs' lists. - for vreg in removed_lrs_vregs { - self.vregs[vreg] + for vreg in self.ctx.scratch_removed_lrs_vregs.drain() { + let lrs = &mut self.ctx.scratch_removed_lrs; + self.ctx.vregs[vreg] .ranges - .retain(|entry| !removed_lrs.contains(&entry.index)); + .retain(|entry| !lrs.contains(&entry.index)); } // Add the new LRs to their respective vreg lists. for (vreg, lr) in new_lrs { - let range = self.ranges[lr].range; + let range = self.ctx.ranges[lr].range; let entry = LiveRangeListEntry { range, index: lr }; - self.vregs[vreg].ranges.push(entry); + self.ctx.vregs[vreg].ranges.push(entry); } // Recompute bundle properties for all new bundles and enqueue // them. for bundle in new_bundles { - if self.bundles[bundle].ranges.len() > 0 { + if self.ctx.bundles[bundle].ranges.len() > 0 { self.recompute_bundle_properties(bundle); - let prio = self.bundles[bundle].prio; - self.allocation_queue + let prio = self.ctx.bundles[bundle].prio; + self.ctx + .allocation_queue .insert(bundle, prio as usize, reg_hint); } } @@ -985,14 +1004,14 @@ impl<'a, F: Function> Env<'a, F> { bundle: LiveBundleIndex, reg_hint: PReg, ) -> Result<(), RegAllocError> { - let class = self.spillsets[self.bundles[bundle].spillset].class; + let class = self.ctx.spillsets[self.bundles[bundle].spillset].class; // Grab a hint from either the queue or our spillset, if any. let mut hint_reg = if reg_hint != PReg::invalid() { reg_hint } else { - self.spillsets[self.bundles[bundle].spillset].reg_hint + self.ctx.spillsets[self.bundles[bundle].spillset].reg_hint }; - if self.pregs[hint_reg.index()].is_stack { + if self.ctx.pregs[hint_reg.index()].is_stack { hint_reg = PReg::invalid(); } trace!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); @@ -1027,12 +1046,13 @@ impl<'a, F: Function> Env<'a, F> { if let Some(spill) = self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) { + let empty_vec = LiveRangeList::new_in(self.ctx.bump()); let mut list = - core::mem::replace(&mut self.bundles[bundle].ranges, smallvec![]); + core::mem::replace(&mut self.ctx.bundles[bundle].ranges, empty_vec); for entry in &list { - self.ranges[entry.index].bundle = spill; + self.ctx.ranges[entry.index].bundle = spill; } - self.bundles[spill].ranges.extend(list.drain(..)); + self.ctx.bundles[spill].ranges.extend(list.drain(..)); return Ok(()); } } @@ -1041,7 +1061,9 @@ impl<'a, F: Function> Env<'a, F> { // Try to allocate! let mut attempts = 0; - loop { + let mut scratch = core::mem::take(&mut self.ctx.scratch_conflicts); + let mut lowest_cost_evict_conflict_set = core::mem::take(&mut self.ctx.scratch_bundle); + 'outer: loop { attempts += 1; trace!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.num_insts()); @@ -1051,14 +1073,14 @@ impl<'a, F: Function> Env<'a, F> { Requirement::Register => None, Requirement::Any => { - self.spilled_bundles.push(bundle); - return Ok(()); + self.ctx.spilled_bundles.push(bundle); + break; } }; // Scan all pregs, or the one fixed preg, and attempt to allocate. - let mut lowest_cost_evict_conflict_set: Option = None; let mut lowest_cost_evict_conflict_cost: Option = None; + lowest_cost_evict_conflict_set.clear(); let mut lowest_cost_split_conflict_cost: Option = None; let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); @@ -1069,14 +1091,14 @@ impl<'a, F: Function> Env<'a, F> { // location in the code and by the bundle we're // considering. This has the effect of spreading // demand more evenly across registers. - let scan_offset = self.ranges[self.bundles[bundle].ranges[0].index] + let scan_offset = self.ctx.ranges[self.bundles[bundle].ranges[0].index] .range .from .inst() .index() + bundle.index(); - self.stats.process_bundle_reg_probe_start_any += 1; + self.ctx.output.stats.process_bundle_reg_probe_start_any += 1; for preg in RegTraversalIter::new( self.env, class, @@ -1085,7 +1107,7 @@ impl<'a, F: Function> Env<'a, F> { scan_offset, fixed_preg, ) { - self.stats.process_bundle_reg_probes_any += 1; + self.ctx.output.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); trace!("trying preg {:?}", preg_idx); @@ -1096,13 +1118,19 @@ impl<'a, F: Function> Env<'a, F> { (Some(a), Some(b)) => Some(core::cmp::max(a, b)), _ => None, }; - match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { + match self.try_to_allocate_bundle_to_reg( + bundle, + preg_idx, + scan_limit_cost, + &mut scratch, + ) { AllocRegResult::Allocated(alloc) => { - self.stats.process_bundle_reg_success_any += 1; + self.ctx.output.stats.process_bundle_reg_success_any += 1; trace!(" -> allocated to any {:?}", preg_idx); - self.spillsets[self.bundles[bundle].spillset].reg_hint = + self.ctx.spillsets[self.ctx.bundles[bundle].spillset].reg_hint = alloc.as_reg().unwrap(); - return Ok(()); + // Success, return scratch memory to context and finish + break 'outer; } AllocRegResult::Conflict(bundles, first_conflict_point) => { trace!( @@ -1111,17 +1139,20 @@ impl<'a, F: Function> Env<'a, F> { first_conflict_point ); - let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); + let conflict_cost = self.maximum_spill_weight_in_bundle_set(bundles); if lowest_cost_evict_conflict_cost.is_none() || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() { lowest_cost_evict_conflict_cost = Some(conflict_cost); - lowest_cost_evict_conflict_set = Some(bundles); + lowest_cost_evict_conflict_set.clear(); + lowest_cost_evict_conflict_set.extend(bundles); } - let loop_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; + let loop_depth = + self.ctx.cfginfo.approx_loop_depth[self.ctx.cfginfo.insn_block + [first_conflict_point.inst().index()] + .index()]; let move_cost = spill_weight_from_constraint( OperandConstraint::Reg, loop_depth as usize, @@ -1140,8 +1171,8 @@ impl<'a, F: Function> Env<'a, F> { AllocRegResult::ConflictWithFixed(max_cost, point) => { trace!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); - let loop_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[point.inst().index()].index()]; + let loop_depth = self.ctx.cfginfo.approx_loop_depth + [self.ctx.cfginfo.insn_block[point.inst().index()].index()]; let move_cost = spill_weight_from_constraint( OperandConstraint::Reg, loop_depth as usize, @@ -1203,7 +1234,7 @@ impl<'a, F: Function> Env<'a, F> { { if let Requirement::Register = req { // Check if this is a too-many-live-registers situation. - let range = self.bundles[bundle].ranges[0].range; + let range = self.ctx.bundles[bundle].ranges[0].range; trace!("checking for too many live regs"); let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; @@ -1217,13 +1248,18 @@ impl<'a, F: Function> Env<'a, F> { from: range.from.prev(), to: range.from.prev(), }); - for (key, lr) in self.pregs[preg.index()].allocations.btree.range(start..) { + for (key, lr) in self.ctx.pregs[preg.index()] + .allocations + .btree + .range(start..) + { let preg_range = key.to_range(); if preg_range.to <= range.from { continue; } if preg_range.from >= range.to { - break; + // Success, return scratch memory to context and finish + break 'outer; } if lr.is_valid() { if self.minimal_bundle(self.ranges[*lr].bundle) { @@ -1275,7 +1311,7 @@ impl<'a, F: Function> Env<'a, F> { " -> deciding to split: our spill weight is {}", self.bundle_spill_weight(bundle) ); - let bundle_start = self.bundles[bundle].ranges[0].range.from; + let bundle_start = self.ctx.bundles[bundle].ranges[0].range.from; let mut split_at_point = core::cmp::max(lowest_cost_split_conflict_point, bundle_start); let requeue_with_reg = lowest_cost_split_conflict_reg; @@ -1283,16 +1319,17 @@ impl<'a, F: Function> Env<'a, F> { // Adjust `split_at_point` if it is within a deeper loop // than the bundle start -- hoist it to just before the // first loop header it encounters. - let bundle_start_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; - let split_at_depth = self.cfginfo.approx_loop_depth - [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; + let bundle_start_depth = self.ctx.cfginfo.approx_loop_depth + [self.ctx.cfginfo.insn_block[bundle_start.inst().index()].index()]; + let split_at_depth = self.ctx.cfginfo.approx_loop_depth + [self.ctx.cfginfo.insn_block[split_at_point.inst().index()].index()]; if split_at_depth > bundle_start_depth { - for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) - ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() + for block in (self.ctx.cfginfo.insn_block[bundle_start.inst().index()].index() + + 1) + ..=self.ctx.cfginfo.insn_block[split_at_point.inst().index()].index() { - if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { - split_at_point = self.cfginfo.block_entry[block]; + if self.ctx.cfginfo.approx_loop_depth[block] > bundle_start_depth { + split_at_point = self.ctx.cfginfo.block_entry[block]; break; } } @@ -1304,16 +1341,22 @@ impl<'a, F: Function> Env<'a, F> { requeue_with_reg, /* should_trim = */ true, ); - return Ok(()); + + // Success, return scratch memory to context and finish + break 'outer; } else { // Evict all bundles in `conflicting bundles` and try again. - self.stats.evict_bundle_event += 1; - for &bundle in &lowest_cost_evict_conflict_set.unwrap() { + self.ctx.output.stats.evict_bundle_event += 1; + for &bundle in &lowest_cost_evict_conflict_set { trace!(" -> evicting {:?}", bundle); self.evict_bundle(bundle); - self.stats.evict_bundle_count += 1; + self.ctx.output.stats.evict_bundle_count += 1; } } } + + self.ctx.scratch_conflicts = scratch; + self.ctx.scratch_bundle = lowest_cost_evict_conflict_set; + return Ok(()); } } diff --git a/src/ion/spill.rs b/src/ion/spill.rs index 657d4b98..c40efc80 100644 --- a/src/ion/spill.rs +++ b/src/ion/spill.rs @@ -16,38 +16,39 @@ use super::{ AllocRegResult, Env, LiveRangeKey, PReg, PRegIndex, RegTraversalIter, SpillSetIndex, SpillSlotData, SpillSlotIndex, }; -use crate::{ion::data_structures::SpillSetRanges, Allocation, Function, SpillSlot}; +use crate::{Allocation, Function, SpillSlot}; impl<'a, F: Function> Env<'a, F> { pub fn try_allocating_regs_for_spilled_bundles(&mut self) { trace!("allocating regs for spilled bundles"); - for i in 0..self.spilled_bundles.len() { - let bundle = self.spilled_bundles[i]; // don't borrow self + let mut scratch = core::mem::take(&mut self.ctx.scratch_conflicts); + for i in 0..self.ctx.spilled_bundles.len() { + let bundle = self.ctx.spilled_bundles[i]; // don't borrow self - if self.bundles[bundle].ranges.is_empty() { + if self.ctx.bundles[bundle].ranges.is_empty() { continue; } - let class = self.spillsets[self.bundles[bundle].spillset].class; - let hint = self.spillsets[self.bundles[bundle].spillset].reg_hint; + let class = self.ctx.spillsets[self.ctx.bundles[bundle].spillset].class; + let hint = self.ctx.spillsets[self.ctx.bundles[bundle].spillset].reg_hint; // This may be an empty-range bundle whose ranges are not // sorted; sort all range-lists again here. - self.bundles[bundle] + self.ctx.bundles[bundle] .ranges .sort_unstable_by_key(|entry| entry.range.from); let mut success = false; - self.stats.spill_bundle_reg_probes += 1; + self.ctx.output.stats.spill_bundle_reg_probes += 1; for preg in RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) { trace!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = - self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) + self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None, &mut scratch) { - self.stats.spill_bundle_reg_success += 1; + self.ctx.output.stats.spill_bundle_reg_success += 1; success = true; break; } @@ -56,11 +57,12 @@ impl<'a, F: Function> Env<'a, F> { trace!( "spilling bundle {:?}: marking spillset {:?} as required", bundle, - self.bundles[bundle].spillset + self.ctx.bundles[bundle].spillset ); - self.spillsets[self.bundles[bundle].spillset].required = true; + self.ctx.spillsets[self.ctx.bundles[bundle].spillset].required = true; } } + self.ctx.scratch_conflicts = scratch; } pub fn spillslot_can_fit_spillset( @@ -68,10 +70,12 @@ impl<'a, F: Function> Env<'a, F> { spillslot: SpillSlotIndex, spillset: SpillSetIndex, ) -> bool { - !self.spillslots[spillslot.index()] + !self.ctx.spillslots[spillslot.index()] .ranges .btree - .contains_key(&LiveRangeKey::from_range(&self.spillsets[spillset].range)) + .contains_key(&LiveRangeKey::from_range( + &self.ctx.spillsets[spillset].range, + )) } pub fn allocate_spillset_to_spillslot( @@ -79,10 +83,10 @@ impl<'a, F: Function> Env<'a, F> { spillset: SpillSetIndex, spillslot: SpillSlotIndex, ) { - self.spillsets[spillset].slot = spillslot; + self.ctx.spillsets[spillset].slot = spillslot; - let res = self.spillslots[spillslot.index()].ranges.btree.insert( - LiveRangeKey::from_range(&self.spillsets[spillset].range), + let res = self.ctx.spillslots[spillslot.index()].ranges.btree.insert( + LiveRangeKey::from_range(&self.ctx.spillsets[spillset].range), spillset, ); @@ -92,19 +96,20 @@ impl<'a, F: Function> Env<'a, F> { pub fn allocate_spillslots(&mut self) { const MAX_ATTEMPTS: usize = 10; - for spillset in 0..self.spillsets.len() { + for spillset in 0..self.ctx.spillsets.len() { trace!("allocate spillslot: {}", spillset); let spillset = SpillSetIndex::new(spillset); - if !self.spillsets[spillset].required { + if !self.ctx.spillsets[spillset].required { continue; } - let class = self.spillsets[spillset].class as usize; + let class = self.ctx.spillsets[spillset].class as usize; // Try a few existing spillslots. - let mut i = self.slots_by_class[class].probe_start; + let mut i = self.ctx.slots_by_class[class].probe_start; let mut success = false; // Never probe the same element more than once: limit the // attempt count to the number of slots in existence. - for _attempt in 0..core::cmp::min(self.slots_by_class[class].slots.len(), MAX_ATTEMPTS) + for _attempt in + 0..core::cmp::min(self.ctx.slots_by_class[class].slots.len(), MAX_ATTEMPTS) { // Note: this indexing of `slots` is always valid // because either the `slots` list is empty and the @@ -113,43 +118,44 @@ impl<'a, F: Function> Env<'a, F> { // in-bounds (because it is made so below when we add // a slot, and it always takes on the last index `i` // after this loop). - let spillslot = self.slots_by_class[class].slots[i]; + let spillslot = self.ctx.slots_by_class[class].slots[i]; if self.spillslot_can_fit_spillset(spillslot, spillset) { self.allocate_spillset_to_spillslot(spillset, spillslot); success = true; - self.slots_by_class[class].probe_start = i; + self.ctx.slots_by_class[class].probe_start = i; break; } - i = self.slots_by_class[class].next_index(i); + i = self.ctx.slots_by_class[class].next_index(i); } if !success { // Allocate a new spillslot. - let spillslot = SpillSlotIndex::new(self.spillslots.len()); - self.spillslots.push(SpillSlotData { - ranges: SpillSetRanges::new(), + let spillslot = SpillSlotIndex::new(self.ctx.spillslots.len()); + self.ctx.spillslots.push(SpillSlotData { + ranges: self.ctx.scratch_spillset_pool.pop().unwrap_or_default(), alloc: Allocation::none(), - slots: self.func.spillslot_size(self.spillsets[spillset].class) as u32, + slots: self.func.spillslot_size(self.ctx.spillsets[spillset].class) as u32, }); - self.slots_by_class[class].slots.push(spillslot); - self.slots_by_class[class].probe_start = self.slots_by_class[class].slots.len() - 1; + self.ctx.slots_by_class[class].slots.push(spillslot); + self.ctx.slots_by_class[class].probe_start = + self.ctx.slots_by_class[class].slots.len() - 1; self.allocate_spillset_to_spillslot(spillset, spillslot); } } // Assign actual slot indices to spillslots. - for i in 0..self.spillslots.len() { - self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].slots); + for i in 0..self.ctx.spillslots.len() { + self.ctx.spillslots[i].alloc = self.allocate_spillslot(self.ctx.spillslots[i].slots); } trace!("spillslot allocator done"); } pub fn allocate_spillslot(&mut self, size: u32) -> Allocation { - let mut offset = self.num_spillslots; + let mut offset = self.ctx.output.num_spillslots as u32; // Align up to `size`. debug_assert!(size.is_power_of_two()); offset = (offset + size - 1) & !(size - 1); @@ -159,7 +165,7 @@ impl<'a, F: Function> Env<'a, F> { offset }; offset += size; - self.num_spillslots = offset; + self.ctx.output.num_spillslots = offset as _; Allocation::stack(SpillSlot::new(slot as usize)) } } diff --git a/src/lib.rs b/src/lib.rs index 60ff46ac..539466a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ */ #![allow(dead_code)] +#![allow(clippy::all)] #![no_std] #[cfg(feature = "std")] @@ -34,6 +35,9 @@ macro_rules! trace_enabled { }; } +use alloc::rc::Rc; +use allocator_api2::vec::Vec as Vec2; +use core::ops::Deref as _; use core::{hash::BuildHasherDefault, iter::FromIterator}; use rustc_hash::FxHasher; type FxHashMap = hashbrown::HashMap>; @@ -51,6 +55,7 @@ pub mod ssa; #[macro_use] mod index; +pub use self::ion::data_structures::Ctx; use alloc::vec::Vec; pub use index::{Block, Inst, InstRange}; @@ -161,6 +166,12 @@ impl PReg { } } +impl Default for PReg { + fn default() -> Self { + Self::invalid() + } +} + impl core::fmt::Debug for PReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( @@ -1469,7 +1480,7 @@ pub struct MachineEnv { } /// The output of the register allocator. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct Output { /// How many spillslots are needed in the frame? @@ -1580,13 +1591,35 @@ pub fn run( options: &RegallocOptions, ) -> Result { match options.algorithm { - Algorithm::Ion => ion::run(func, env, options.verbose_log, options.validate_ssa), + Algorithm::Ion => { + let mut ctx = Ctx::default(); + run_with_ctx(func, env, options, &mut ctx)?; + Ok(ctx.output) + } Algorithm::Fastalloc => { fastalloc::run(func, env, options.verbose_log, options.validate_ssa) } } } +/// Run the allocator with reusable context. +/// +/// Return value points to `ctx.output` that can be alternatively `std::mem::take`n. +pub fn run_with_ctx<'a, F: Function>( + func: &F, + env: &MachineEnv, + options: &RegallocOptions, + ctx: &'a mut Ctx, +) -> Result<&'a Output, RegAllocError> { + match options.algorithm { + Algorithm::Ion => ion::run(func, env, ctx, options.verbose_log, options.validate_ssa)?, + Algorithm::Fastalloc => { + ctx.output = fastalloc::run(func, env, options.verbose_log, options.validate_ssa)? + } + } + Ok(&ctx.output) +} + #[derive(Clone, Copy, Debug, Default)] pub enum Algorithm { #[default] @@ -1606,3 +1639,93 @@ pub struct RegallocOptions { /// The register allocation algorithm to be used. pub algorithm: Algorithm, } + +pub(crate) trait VecExt { + /// Fills `self` with `value` up to `len` and return the mutable slice to the values. + fn repopulate(&mut self, len: usize, value: T) -> &mut [T] + where + T: Clone; + /// Clears the `self` and returns a mutable reference to it. + fn cleared(&mut self) -> &mut Self; + /// Makes sure `self` is empty and has at least `cap` capacity. + fn preallocate(&mut self, cap: usize) -> &mut Self; +} + +impl VecExt for Vec { + fn repopulate(&mut self, len: usize, value: T) -> &mut [T] + where + T: Clone, + { + self.clear(); + self.resize(len, value); + self + } + + fn cleared(&mut self) -> &mut Self { + self.clear(); + self + } + + fn preallocate(&mut self, cap: usize) -> &mut Self { + self.clear(); + self.reserve(cap); + self + } +} + +#[derive(Debug, Clone, Default)] +pub(crate) struct Bump(Rc); + +impl Bump { + pub(crate) fn get_mut(&mut self) -> Option<&mut bumpalo::Bump> { + Rc::get_mut(&mut self.0) + } +} + +// Simply delegating because `Rc` does not implement `Allocator`. +unsafe impl allocator_api2::alloc::Allocator for Bump { + fn allocate( + &self, + layout: core::alloc::Layout, + ) -> Result, allocator_api2::alloc::AllocError> { + self.0.deref().allocate(layout) + } + + unsafe fn deallocate(&self, ptr: core::ptr::NonNull, layout: core::alloc::Layout) { + self.0.deref().deallocate(ptr, layout); + } + + fn allocate_zeroed( + &self, + layout: core::alloc::Layout, + ) -> Result, allocator_api2::alloc::AllocError> { + self.0.deref().allocate_zeroed(layout) + } + + unsafe fn grow( + &self, + ptr: core::ptr::NonNull, + old_layout: core::alloc::Layout, + new_layout: core::alloc::Layout, + ) -> Result, allocator_api2::alloc::AllocError> { + self.0.deref().grow(ptr, old_layout, new_layout) + } + + unsafe fn grow_zeroed( + &self, + ptr: core::ptr::NonNull, + old_layout: core::alloc::Layout, + new_layout: core::alloc::Layout, + ) -> Result, allocator_api2::alloc::AllocError> { + self.0.deref().grow_zeroed(ptr, old_layout, new_layout) + } + + unsafe fn shrink( + &self, + ptr: core::ptr::NonNull, + old_layout: core::alloc::Layout, + new_layout: core::alloc::Layout, + ) -> Result, allocator_api2::alloc::AllocError> { + self.0.deref().shrink(ptr, old_layout, new_layout) + } +} diff --git a/src/postorder.rs b/src/postorder.rs index 020e4ea0..d1e8330b 100644 --- a/src/postorder.rs +++ b/src/postorder.rs @@ -5,53 +5,46 @@ //! Fast postorder computation. -use crate::Block; -use alloc::vec; +use crate::{Block, VecExt}; use alloc::vec::Vec; use smallvec::{smallvec, SmallVec}; pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>( num_blocks: usize, entry: Block, + visited_scratch: &mut Vec, + out: &mut Vec, succ_blocks: SuccFn, -) -> Vec { - let mut ret = vec![]; - +) { // State: visited-block map, and explicit DFS stack. - let mut visited = vec![false; num_blocks]; - struct State<'a> { block: Block, - succs: &'a [Block], - next_succ: usize, + succs: core::slice::Iter<'a, Block>, } + + let visited = visited_scratch.repopulate(num_blocks, false); let mut stack: SmallVec<[State; 64]> = smallvec![]; + out.clear(); visited[entry.index()] = true; stack.push(State { block: entry, - succs: succ_blocks(entry), - next_succ: 0, + succs: succ_blocks(entry).iter(), }); while let Some(ref mut state) = stack.last_mut() { // Perform one action: push to new succ, skip an already-visited succ, or pop. - if state.next_succ < state.succs.len() { - let succ = state.succs[state.next_succ]; - state.next_succ += 1; + if let Some(&succ) = state.succs.next() { if !visited[succ.index()] { visited[succ.index()] = true; stack.push(State { block: succ, - succs: succ_blocks(succ), - next_succ: 0, + succs: succ_blocks(succ).iter(), }); } } else { - ret.push(state.block); + out.push(state.block); stack.pop(); } } - - ret } diff --git a/src/ssa.rs b/src/ssa.rs index ac6263ef..4742f46c 100644 --- a/src/ssa.rs +++ b/src/ssa.rs @@ -6,10 +6,9 @@ //! SSA-related utilities. use alloc::vec; -use hashbrown::HashSet; use crate::cfg::CFGInfo; -use crate::{Block, Function, Inst, OperandKind, RegAllocError, VReg}; +use crate::{Block, Function, FxHashSet, Inst, OperandKind, RegAllocError, VReg}; pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { // For every block param and inst def, check that this is the only def. @@ -41,7 +40,7 @@ pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllo // the def is either in the same block in an earlier inst, or is // defined (by inst or blockparam) in some other block that // dominates this one. - let mut local = HashSet::new(); + let mut local = FxHashSet::default(); for block in 0..f.num_blocks() { let block = Block::new(block); local.clear();