Skip to content

Commit 6eddbec

Browse files
committed
Auto merge of rust-lang#129283 - saethlin:unreachable-allocas, r=<try>
Don't alloca for unused locals We already have a concept of mono-unreachable basic blocks; this is primarily useful for ensuring that we do not compile code under an `if false`. But since we never gave locals the same analysis, a large local only used under an `if false` will still have stack space allocated for it. There are 3 places we traverse MIR during monomorphization: Inside the collector, `non_ssa_locals`, and the walk to generate code. Unfortunately, rust-lang#129283 (comment) indicates that we cannot afford the expense of tracking reachable locals during the collector's traversal, so we do need at least two mono-reachable traversals. And of course caching is of no help here because the benchmarks that regress are incr-unchanged; they don't do any codegen. This fixes the second problem in rust-lang#129282, and brings us anther step toward `const if` at home.
2 parents 5aea140 + 66653dc commit 6eddbec

File tree

6 files changed

+219
-18
lines changed

6 files changed

+219
-18
lines changed

compiler/rustc_codegen_ssa/src/mir/analyze.rs

+9-3
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,18 @@ use crate::traits::*;
1515

1616
pub fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
1717
fx: &FunctionCx<'a, 'tcx, Bx>,
18+
traversal_order: &[mir::BasicBlock],
19+
reachable_locals: &BitSet<mir::Local>,
1820
) -> BitSet<mir::Local> {
1921
let mir = fx.mir;
2022
let dominators = mir.basic_blocks.dominators();
2123
let locals = mir
2224
.local_decls
23-
.iter()
24-
.map(|decl| {
25+
.iter_enumerated()
26+
.map(|(local, decl)| {
27+
if !reachable_locals.contains(local) {
28+
return LocalKind::Unused;
29+
}
2530
let ty = fx.monomorphize(decl.ty);
2631
let layout = fx.cx.spanned_layout_of(ty, decl.source_info.span);
2732
if layout.is_zst() {
@@ -44,7 +49,8 @@ pub fn non_ssa_locals<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
4449
// If there exists a local definition that dominates all uses of that local,
4550
// the definition should be visited first. Traverse blocks in an order that
4651
// is a topological sort of dominance partial order.
47-
for (bb, data) in traversal::reverse_postorder(mir) {
52+
for bb in traversal_order.iter().copied() {
53+
let data = &mir.basic_blocks[bb];
4854
analyzer.visit_basic_block_data(bb, data);
4955
}
5056

compiler/rustc_codegen_ssa/src/mir/mod.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
192192
})
193193
.collect();
194194

195+
let (traversal_order, reachable_locals) =
196+
traversal::mono_reachable_reverse_postorder(mir, cx.tcx(), instance);
197+
195198
let mut fx = FunctionCx {
196199
instance,
197200
mir,
@@ -218,7 +221,7 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
218221

219222
fx.per_local_var_debug_info = fx.compute_per_local_var_debug_info(&mut start_bx);
220223

221-
let memory_locals = analyze::non_ssa_locals(&fx);
224+
let memory_locals = analyze::non_ssa_locals(&fx, &traversal_order, &reachable_locals);
222225

223226
// Allocate variable and temp allocas
224227
let local_values = {
@@ -277,17 +280,14 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
277280
// So drop the builder of `start_llbb` to avoid having two at the same time.
278281
drop(start_bx);
279282

280-
let reachable_blocks = traversal::mono_reachable_as_bitset(mir, cx.tcx(), instance);
281-
282-
// Codegen the body of each block using reverse postorder
283-
for (bb, _) in traversal::reverse_postorder(mir) {
284-
if reachable_blocks.contains(bb) {
285-
fx.codegen_block(bb);
286-
} else {
287-
// We want to skip this block, because it's not reachable. But we still create
288-
// the block so terminators in other blocks can reference it.
289-
fx.codegen_block_as_unreachable(bb);
290-
}
283+
let mut unreached_blocks = BitSet::new_filled(mir.basic_blocks.len());
284+
// Codegen the body of each reachable block using our reverse postorder list.
285+
for bb in traversal_order {
286+
fx.codegen_block(bb);
287+
unreached_blocks.remove(bb);
288+
}
289+
for bb in unreached_blocks.iter() {
290+
fx.codegen_block_as_unreachable(bb);
291291
}
292292
}
293293

compiler/rustc_middle/src/mir/terminator.rs

+9
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,15 @@ mod helper {
413413
use super::*;
414414
pub type Successors<'a> = impl DoubleEndedIterator<Item = BasicBlock> + 'a;
415415
pub type SuccessorsMut<'a> = impl DoubleEndedIterator<Item = &'a mut BasicBlock> + 'a;
416+
417+
impl SwitchTargets {
418+
#[inline]
419+
pub fn successors_for_value(&self, value: u128) -> Successors<'_> {
420+
let target = self.target_for_value(value);
421+
(&[]).into_iter().copied().chain(Some(target))
422+
}
423+
}
424+
416425
impl<'tcx> TerminatorKind<'tcx> {
417426
#[inline]
418427
pub fn successors(&self) -> Successors<'_> {

compiler/rustc_middle/src/mir/traversal.rs

+165
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,155 @@ pub fn postorder<'a, 'tcx>(
232232
reverse_postorder(body).rev()
233233
}
234234

235+
pub struct MonoReachablePostorder<'a, 'tcx> {
236+
basic_blocks: &'a IndexSlice<BasicBlock, BasicBlockData<'tcx>>,
237+
visited: BitSet<BasicBlock>,
238+
visit_stack: Vec<(BasicBlock, Successors<'a>)>,
239+
locals: BitSet<Local>,
240+
tcx: TyCtxt<'tcx>,
241+
instance: Instance<'tcx>,
242+
}
243+
244+
impl<'a, 'tcx> MonoReachablePostorder<'a, 'tcx> {
245+
pub fn new(
246+
body: &'a Body<'tcx>,
247+
tcx: TyCtxt<'tcx>,
248+
instance: Instance<'tcx>,
249+
) -> MonoReachablePostorder<'a, 'tcx> {
250+
let basic_blocks = &body.basic_blocks;
251+
let mut po = MonoReachablePostorder {
252+
basic_blocks,
253+
visited: BitSet::new_empty(basic_blocks.len()),
254+
visit_stack: Vec::new(),
255+
locals: BitSet::new_empty(body.local_decls.len()),
256+
tcx,
257+
instance,
258+
};
259+
260+
let root = START_BLOCK;
261+
let data = &po.basic_blocks[root];
262+
263+
UsedLocals { locals: &mut po.locals }.visit_basic_block_data(root, data);
264+
if let Some(ref term) = data.terminator {
265+
po.visited.insert(root);
266+
267+
let successors = if let Some((bits, targets)) =
268+
Body::try_const_mono_switchint(tcx, instance, data)
269+
{
270+
targets.successors_for_value(bits)
271+
} else {
272+
term.successors()
273+
};
274+
275+
po.visit_stack.push((root, successors));
276+
po.traverse_successor();
277+
}
278+
279+
po
280+
}
281+
282+
fn traverse_successor(&mut self) {
283+
// This is quite a complex loop due to 1. the borrow checker not liking it much
284+
// and 2. what exactly is going on is not clear
285+
//
286+
// It does the actual traversal of the graph, while the `next` method on the iterator
287+
// just pops off of the stack. `visit_stack` is a stack containing pairs of nodes and
288+
// iterators over the successors of those nodes. Each iteration attempts to get the next
289+
// node from the top of the stack, then pushes that node and an iterator over the
290+
// successors to the top of the stack. This loop only grows `visit_stack`, stopping when
291+
// we reach a child that has no children that we haven't already visited.
292+
//
293+
// For a graph that looks like this:
294+
//
295+
// A
296+
// / \
297+
// / \
298+
// B C
299+
// | |
300+
// | |
301+
// | D
302+
// \ /
303+
// \ /
304+
// E
305+
//
306+
// The state of the stack starts out with just the root node (`A` in this case);
307+
// [(A, [B, C])]
308+
//
309+
// When the first call to `traverse_successor` happens, the following happens:
310+
//
311+
// [(C, [D]), // `C` taken from the successors of `A`, pushed to the
312+
// // top of the stack along with the successors of `C`
313+
// (A, [B])]
314+
//
315+
// [(D, [E]), // `D` taken from successors of `C`, pushed to stack
316+
// (C, []),
317+
// (A, [B])]
318+
//
319+
// [(E, []), // `E` taken from successors of `D`, pushed to stack
320+
// (D, []),
321+
// (C, []),
322+
// (A, [B])]
323+
//
324+
// Now that the top of the stack has no successors we can traverse, each item will
325+
// be popped off during iteration until we get back to `A`. This yields [E, D, C].
326+
//
327+
// When we yield `C` and call `traverse_successor`, we push `B` to the stack, but
328+
// since we've already visited `E`, that child isn't added to the stack. The last
329+
// two iterations yield `B` and finally `A` for a final traversal of [E, D, C, B, A]
330+
while let Some(bb) = self.visit_stack.last_mut().and_then(|(_, iter)| iter.next_back()) {
331+
if self.visited.insert(bb) {
332+
let data = &self.basic_blocks[bb];
333+
UsedLocals { locals: &mut self.locals }.visit_basic_block_data(bb, data);
334+
335+
let Some(term) = &data.terminator else {
336+
continue;
337+
};
338+
339+
let successors = if let Some((bits, targets)) =
340+
Body::try_const_mono_switchint(self.tcx, self.instance, data)
341+
{
342+
targets.successors_for_value(bits)
343+
} else {
344+
term.successors()
345+
};
346+
347+
self.visit_stack.push((bb, successors));
348+
}
349+
}
350+
}
351+
}
352+
353+
impl<'tcx> Iterator for MonoReachablePostorder<'_, 'tcx> {
354+
type Item = BasicBlock;
355+
356+
fn next(&mut self) -> Option<BasicBlock> {
357+
let (bb, _) = self.visit_stack.pop()?;
358+
self.traverse_successor();
359+
360+
Some(bb)
361+
}
362+
363+
fn size_hint(&self) -> (usize, Option<usize>) {
364+
// All the blocks, minus the number of blocks we've visited.
365+
let remaining = self.basic_blocks.len() - self.visited.count();
366+
(remaining, Some(remaining))
367+
}
368+
}
369+
370+
pub fn mono_reachable_reverse_postorder<'a, 'tcx>(
371+
body: &'a Body<'tcx>,
372+
tcx: TyCtxt<'tcx>,
373+
instance: Instance<'tcx>,
374+
) -> (Vec<BasicBlock>, BitSet<Local>) {
375+
let mut iter = MonoReachablePostorder::new(body, tcx, instance);
376+
let mut items = Vec::with_capacity(body.basic_blocks.len());
377+
while let Some(block) = iter.next() {
378+
items.push(block);
379+
}
380+
items.reverse();
381+
(items, iter.locals)
382+
}
383+
235384
/// Returns an iterator over all basic blocks reachable from the `START_BLOCK` in no particular
236385
/// order.
237386
///
@@ -320,6 +469,22 @@ pub struct MonoReachable<'a, 'tcx> {
320469
worklist: BitSet<BasicBlock>,
321470
}
322471

472+
struct UsedLocals<'a> {
473+
locals: &'a mut BitSet<Local>,
474+
}
475+
476+
use crate::mir::visit::Visitor;
477+
impl<'a, 'tcx> Visitor<'tcx> for UsedLocals<'a> {
478+
fn visit_local(
479+
&mut self,
480+
local: Local,
481+
_ctx: crate::mir::visit::PlaceContext,
482+
_location: Location,
483+
) {
484+
self.locals.insert(local);
485+
}
486+
}
487+
323488
impl<'a, 'tcx> MonoReachable<'a, 'tcx> {
324489
pub fn new(
325490
body: &'a Body<'tcx>,

tests/codegen/constant-branch.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,8 @@ pub fn if_constant_match() {
4141
_ => 4,
4242
};
4343

44-
// CHECK: br label %[[MINUS1:.+]]
44+
// CHECK: br label %{{.+}}
4545
_ = match -1 {
46-
// CHECK: [[MINUS1]]:
47-
// CHECK: store i32 1
4846
-1 => 1,
4947
_ => 0,
5048
}
+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
//@ compile-flags: -Cno-prepopulate-passes -Copt-level=0
2+
3+
#![crate_type = "lib"]
4+
5+
#[inline(never)]
6+
fn test<const SIZE: usize>() {
7+
// CHECK-LABEL: no_alloca_inside_if_false::test
8+
// CHECK: start:
9+
// CHECK-NEXT: %0 = alloca
10+
// CHECK-NEXT: %vec = alloca
11+
// CHECK-NOT: %arr = alloca
12+
if const { SIZE < 4096 } {
13+
let arr = [0u8; SIZE];
14+
std::hint::black_box(&arr);
15+
} else {
16+
let vec = vec![0u8; SIZE];
17+
std::hint::black_box(&vec);
18+
}
19+
}
20+
21+
pub fn main() {
22+
test::<8192>();
23+
}

0 commit comments

Comments
 (0)