Skip to content

Commit

Permalink
riscv64: Improve lowering for base select
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed May 29, 2024
1 parent cf609c8 commit f956c4c
Show file tree
Hide file tree
Showing 38 changed files with 667 additions and 742 deletions.
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ impl IntegerCompare {
..self
}
}

pub(crate) fn regs(&self) -> [Reg; 2] {
[self.rs1, self.rs2]
}
}

#[derive(Debug, Clone, Copy, PartialEq)]
Expand Down
69 changes: 56 additions & 13 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1520,25 +1520,68 @@ impl Inst {
ref x,
ref y,
} => {
let label_true = sink.get_label();
let label_false = sink.get_label();
// The general form for this select is the following:
//
// mv rd, x
// b{cond} rcond, label_end
// mv rd, y
// label_end:
// ... etc
//
// This is built on the assumption that moves are cheap, but branches and jumps
// are not. So with this format we always avoid one jump instruction at the expense
// of an unconditional move.
//
// We also perform another optimization here. If the destination register is the same
// as one of the input registers, we can avoid emitting the first unconditional move
// and emit just the branch and the second move.
//
// To make sure that this happens as often as possible, we also try to invert the
// condition, so that if either of the input registers are the same as the destination
// we avoid that move.

let label_end = sink.get_label();

let xregs = x.regs();
let yregs = y.regs();
let dstregs: Vec<Reg> = dst.regs().into_iter().map(|r| r.to_reg()).collect();
let condregs = condition.regs();

// We are going to write to the destination register before evaluating
// the condition, so we need to make sure that the destination register
// is not one of the condition registers.
//
// This should never happen, since hopefully the regalloc constraints
// for this register are set up correctly.
debug_assert_ne!(dstregs, condregs);

// Check if we can invert the condition and avoid moving the y registers into
// the destination. This allows us to only emit the branch and one of the moves.
let (uncond_move, cond_move, condition) = if yregs == dstregs {
(yregs, xregs, condition.inverse())
} else {
(xregs, yregs, condition)
};

// Unconditonally move one of the values to the destination register.
//
// These moves may not end up being emitted if the source and
// destination registers are the same. That logic is built into
// the emit function for `Inst::Mov`.
for i in gen_moves(dst.regs(), uncond_move) {
i.emit(sink, emit_info, state);
}

// If the condition passes we skip over the conditional move
Inst::CondBr {
taken: CondBrTarget::Label(label_true),
not_taken: CondBrTarget::Label(label_false),
taken: CondBrTarget::Label(label_end),
not_taken: CondBrTarget::Fallthrough,
kind: condition,
}
.emit(sink, emit_info, state);
sink.bind_label(label_true, &mut state.ctrl_plane);
// here is the true
// select the first value
for i in gen_moves(dst.regs(), x.regs()) {
i.emit(sink, emit_info, state);
}
Inst::gen_jump(label_end).emit(sink, emit_info, state);

sink.bind_label(label_false, &mut state.ctrl_plane);
for i in gen_moves(dst.regs(), y.regs()) {
// Move the conditional value to the destination register.
for i in gen_moves(dst.regs(), cond_move) {
i.emit(sink, emit_info, state);
}

Expand Down
11 changes: 9 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -459,14 +459,19 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
y,
..
} => {
collector.reg_use(rs1);
collector.reg_use(rs2);
// Mark the condition registers as late use so that they don't overlap with the destination
// register. We may potentially write to the destination register before evaluating the
// condition.
collector.reg_late_use(rs1);
collector.reg_late_use(rs2);

for reg in x.regs_mut() {
collector.reg_use(reg);
}
for reg in y.regs_mut() {
collector.reg_use(reg);
}

// If there's more than one destination register then use
// `reg_early_def` to prevent destination registers from overlapping
// with any operands. This ensures that the lowering doesn't have to
Expand All @@ -476,6 +481,8 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
// When there's only one destination register though don't use an
// early def because once the register is written no other inputs
// are read so it's ok for the destination to overlap the sources.
// The condition registers are already marked as late use so they
// won't overlap with the destination.
match dst.regs_mut() {
[reg] => collector.reg_def(reg),
regs => {
Expand Down
66 changes: 24 additions & 42 deletions cranelift/filetests/filetests/isa/riscv64/bitops.clif
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,8 @@ block0(v0: i128):
; addi a2, a2, -1
; srli a4, a4, 1
; j -0x18
; bnez a1, 0xc
; mv a0, a3
; j 8
; beqz a1, 8
; mv a0, zero
; add a0, a5, a0
; mv a1, zero
Expand All @@ -183,9 +182,8 @@ block0(v0: i8):
; slli a2, a0, 0x38
; srai a4, a2, 0x38
; not a0, a4
; bgez a4, 0xc
; mv a2, a0
; j 8
; bltz a4, 8
; mv a2, a4
; mv a0, zero
; addi a5, zero, 0x40
Expand Down Expand Up @@ -222,9 +220,8 @@ block0(v0: i16):
; slli a2, a0, 0x30
; srai a4, a2, 0x30
; not a0, a4
; bgez a4, 0xc
; mv a2, a0
; j 8
; bltz a4, 8
; mv a2, a4
; mv a0, zero
; addi a5, zero, 0x40
Expand Down Expand Up @@ -259,9 +256,8 @@ block0(v0: i32):
; block0: ; offset 0x0
; sext.w a2, a0
; not a4, a2
; bgez a2, 0xc
; mv a0, a4
; j 8
; bltz a2, 8
; mv a0, a2
; mv a4, zero
; addi a3, zero, 0x40
Expand Down Expand Up @@ -294,9 +290,8 @@ block0(v0: i64):
; Disassembled:
; block0: ; offset 0x0
; not a2, a0
; bgez a0, 0xc
; mv a4, a2
; j 8
; bltz a0, 8
; mv a4, a0
; mv a2, zero
; addi a1, zero, 0x40
Expand Down Expand Up @@ -335,14 +330,12 @@ block0(v0: i128):
; Disassembled:
; block0: ; offset 0x0
; not a3, a0
; bgez a1, 0xc
; mv a5, a3
; j 8
; bltz a1, 8
; mv a5, a0
; not a2, a1
; bgez a1, 0xc
; mv a3, a2
; j 8
; bltz a1, 8
; mv a3, a1
; mv a1, zero
; addi a0, zero, 0x40
Expand All @@ -366,9 +359,8 @@ block0(v0: i128):
; addi a4, a4, -1
; srli a2, a2, 1
; j -0x18
; bnez a3, 0xc
; mv a2, a0
; j 8
; beqz a3, 8
; mv a2, zero
; add a3, a1, a2
; addi a0, a3, -1
Expand Down Expand Up @@ -1444,10 +1436,10 @@ block0(v0: i128, v1: i8):
; srl a0,a0,a3
; select a3,zero,a0##condition=(a5 eq zero)
; sll a5,a1,a5
; or a5,a3,a5
; or t0,a3,a5
; li a3,64
; andi a2,a2,127
; select [a0,a1],[zero,a4],[a4,a5]##condition=(a2 uge a3)
; andi a5,a2,127
; select [a0,a1],[zero,a4],[a4,t0]##condition=(a5 uge a3)
; ret
;
; Disassembled:
Expand All @@ -1457,20 +1449,18 @@ block0(v0: i128, v1: i8):
; sub a3, a3, a5
; sll a4, a0, a5
; srl a0, a0, a3
; bnez a5, 0xc
; mv a3, zero
; j 8
; beqz a5, 8
; mv a3, a0
; sll a5, a1, a5
; or a5, a3, a5
; or t0, a3, a5
; addi a3, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a3, 0x10
; andi a5, a2, 0x7f
; mv a0, zero
; mv a1, a4
; j 0xc
; bgeu a5, a3, 0xc
; mv a0, a4
; mv a1, a5
; mv a1, t0
; ret

function %ishl_i128_i128(i128, i128) -> i128 {
Expand Down Expand Up @@ -1511,10 +1501,9 @@ block0(v0: i128, v1: i128):
; or a4, a3, a0
; addi a3, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a3, 0x10
; mv a0, zero
; mv a1, a5
; j 0xc
; bgeu a2, a3, 0xc
; mv a0, a5
; mv a1, a4
; ret
Expand Down Expand Up @@ -1546,19 +1535,17 @@ block0(v0: i128, v1: i8):
; addi a3, zero, 0x40
; sub a3, a3, a4
; sll a5, a1, a3
; bnez a4, 0xc
; mv a3, zero
; j 8
; beqz a4, 8
; mv a3, a5
; srl a5, a0, a4
; or a5, a3, a5
; addi t0, zero, 0x40
; srl a3, a1, a4
; andi a4, a2, 0x7f
; bltu a4, t0, 0x10
; mv a0, a3
; mv a1, zero
; j 0xc
; bgeu a4, t0, 0xc
; mv a0, a5
; mv a1, a3
; ret
Expand Down Expand Up @@ -1615,10 +1602,9 @@ block0(v0: i128, v1: i128):
; addi a3, zero, 0x40
; srl a4, a1, a5
; andi a5, a2, 0x7f
; bltu a5, a3, 0x10
; mv a0, a4
; mv a1, zero
; j 0xc
; bgeu a5, a3, 0xc
; mv a0, s11
; mv a1, a4
; ld s11, 8(sp)
Expand Down Expand Up @@ -1658,25 +1644,22 @@ block0(v0: i128, v1: i8):
; addi a3, zero, 0x40
; sub a3, a3, a4
; sll a5, a1, a3
; bnez a4, 0xc
; mv a3, zero
; j 8
; beqz a4, 8
; mv a3, a5
; srl a5, a0, a4
; or a5, a3, a5
; addi a0, zero, 0x40
; sra a3, a1, a4
; addi a4, zero, -1
; bgez a1, 0xc
; mv t4, a4
; j 8
; bltz a1, 8
; mv t4, zero
; addi a4, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a4, 0x10
; mv a0, a3
; mv a1, t4
; j 0xc
; bgeu a2, a4, 0xc
; mv a0, a5
; mv a1, a3
; ret
Expand Down Expand Up @@ -1740,10 +1723,9 @@ block0(v0: i128, v1: i128):
; mv a5, zero
; addi a4, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a4, 0x10
; mv a0, a3
; mv a1, a5
; j 0xc
; bgeu a2, a4, 0xc
; mv a0, s11
; mv a1, a3
; ld s11, 8(sp)
Expand Down
Loading

0 comments on commit f956c4c

Please sign in to comment.