Skip to content

Commit

Permalink
riscv64: Improve lowering for base select
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed May 27, 2024
1 parent f926813 commit dac71f9
Show file tree
Hide file tree
Showing 38 changed files with 805 additions and 769 deletions.
22 changes: 10 additions & 12 deletions cranelift/codegen/src/isa/riscv64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1520,24 +1520,22 @@ impl Inst {
ref x,
ref y,
} => {
let label_true = sink.get_label();
let label_false = sink.get_label();
let label_end = sink.get_label();

// Unconditonally move the x value to the destination register.
for i in gen_moves(dst.regs(), x.regs()) {
i.emit(sink, emit_info, state);
}

// If the condition passes we skip over the y move
Inst::CondBr {
taken: CondBrTarget::Label(label_true),
not_taken: CondBrTarget::Label(label_false),
taken: CondBrTarget::Label(label_end),
not_taken: CondBrTarget::Fallthrough,
kind: condition,
}
.emit(sink, emit_info, state);
sink.bind_label(label_true, &mut state.ctrl_plane);
// here is the true
// select the first value
for i in gen_moves(dst.regs(), x.regs()) {
i.emit(sink, emit_info, state);
}
Inst::gen_jump(label_end).emit(sink, emit_info, state);

sink.bind_label(label_false, &mut state.ctrl_plane);
// Move the y value to the destination register.
for i in gen_moves(dst.regs(), y.regs()) {
i.emit(sink, emit_info, state);
}
Expand Down
22 changes: 6 additions & 16 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,22 +467,12 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
for reg in y.regs_mut() {
collector.reg_use(reg);
}
// If there's more than one destination register then use
// `reg_early_def` to prevent destination registers from overlapping
// with any operands. This ensures that the lowering doesn't have to
// deal with a situation such as when the input registers need to be
// swapped when moved to the destination.
//
// When there's only one destination register though don't use an
// early def because once the register is written no other inputs
// are read so it's ok for the destination to overlap the sources.
match dst.regs_mut() {
[reg] => collector.reg_def(reg),
regs => {
for d in regs {
collector.reg_early_def(d);
}
}

// We unconditionally move one of the inputs to the destination register
// before evaluating the condition. Mark this as early def so that
// there are no conflicts with one of the input registers.
for d in dst.regs_mut() {
collector.reg_early_def(d);
}
}
Inst::AtomicCas {
Expand Down
97 changes: 42 additions & 55 deletions cranelift/filetests/filetests/isa/riscv64/bitops.clif
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,8 @@ block0(v0: i128):
; addi a2, a2, -1
; srli a4, a4, 1
; j -0x18
; bnez a1, 0xc
; mv a0, a3
; j 8
; beqz a1, 8
; mv a0, zero
; add a0, a5, a0
; mv a1, zero
Expand All @@ -183,9 +182,8 @@ block0(v0: i8):
; slli a2, a0, 0x38
; srai a4, a2, 0x38
; not a0, a4
; bgez a4, 0xc
; mv a2, a0
; j 8
; bltz a4, 8
; mv a2, a4
; mv a0, zero
; addi a5, zero, 0x40
Expand Down Expand Up @@ -222,9 +220,8 @@ block0(v0: i16):
; slli a2, a0, 0x30
; srai a4, a2, 0x30
; not a0, a4
; bgez a4, 0xc
; mv a2, a0
; j 8
; bltz a4, 8
; mv a2, a4
; mv a0, zero
; addi a5, zero, 0x40
Expand Down Expand Up @@ -259,9 +256,8 @@ block0(v0: i32):
; block0: ; offset 0x0
; sext.w a2, a0
; not a4, a2
; bgez a2, 0xc
; mv a0, a4
; j 8
; bltz a2, 8
; mv a0, a2
; mv a4, zero
; addi a3, zero, 0x40
Expand Down Expand Up @@ -294,9 +290,8 @@ block0(v0: i64):
; Disassembled:
; block0: ; offset 0x0
; not a2, a0
; bgez a0, 0xc
; mv a4, a2
; j 8
; bltz a0, 8
; mv a4, a0
; mv a2, zero
; addi a1, zero, 0x40
Expand Down Expand Up @@ -335,14 +330,12 @@ block0(v0: i128):
; Disassembled:
; block0: ; offset 0x0
; not a3, a0
; bgez a1, 0xc
; mv a5, a3
; j 8
; bltz a1, 8
; mv a5, a0
; not a2, a1
; bgez a1, 0xc
; mv a3, a2
; j 8
; bltz a1, 8
; mv a3, a1
; mv a1, zero
; addi a0, zero, 0x40
Expand All @@ -366,9 +359,8 @@ block0(v0: i128):
; addi a4, a4, -1
; srli a2, a2, 1
; j -0x18
; bnez a3, 0xc
; mv a2, a0
; j 8
; beqz a3, 8
; mv a2, zero
; add a3, a1, a2
; addi a0, a3, -1
Expand Down Expand Up @@ -493,8 +485,8 @@ block0(v0: i128):
; block0:
; ctz a5,a1##ty=i64 tmp=a3 step=a4
; ctz a3,a0##ty=i64 tmp=a1 step=a2
; select a5,a5,zero##condition=(a0 eq zero)
; add a0,a3,a5
; select a1,a5,zero##condition=(a0 eq zero)
; add a0,a3,a1
; li a1,0
; ret
;
Expand All @@ -520,9 +512,10 @@ block0(v0: i128):
; addi a2, a2, -1
; slli a1, a1, 1
; j -0x18
; mv a1, a5
; beqz a0, 8
; mv a5, zero
; add a0, a3, a5
; mv a1, zero
; add a0, a3, a1
; mv a1, zero
; ret

Expand Down Expand Up @@ -1457,18 +1450,16 @@ block0(v0: i128, v1: i8):
; sub a3, a3, a5
; sll a4, a0, a5
; srl a0, a0, a3
; beqz a5, 0xc
; mv a3, a0
; j 8
; bnez a5, 8
; mv a3, zero
; sll a5, a1, a5
; or a5, a3, a5
; addi a3, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a3, 0x10
; mv a0, zero
; mv a1, a4
; j 0xc
; bgeu a2, a3, 0xc
; mv a0, a4
; mv a1, a5
; ret
Expand All @@ -1487,8 +1478,8 @@ block0(v0: i128, v1: i128):
; sub a3,a3,a0
; mv a4,a5
; sll a5,a4,a0
; srl a3,a4,a3
; select a3,a3,zero##condition=(a0 ne zero)
; srl a4,a4,a3
; select a3,a4,zero##condition=(a0 ne zero)
; sll a0,a1,a0
; or a4,a3,a0
; li a3,64
Expand All @@ -1504,17 +1495,17 @@ block0(v0: i128, v1: i128):
; sub a3, a3, a0
; mv a4, a5
; sll a5, a4, a0
; srl a3, a4, a3
; srl a4, a4, a3
; mv a3, a4
; bnez a0, 8
; mv a3, zero
; sll a0, a1, a0
; or a4, a3, a0
; addi a3, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a3, 0x10
; mv a0, zero
; mv a1, a5
; j 0xc
; bgeu a2, a3, 0xc
; mv a0, a5
; mv a1, a4
; ret
Expand Down Expand Up @@ -1546,19 +1537,17 @@ block0(v0: i128, v1: i8):
; addi a3, zero, 0x40
; sub a3, a3, a4
; sll a5, a1, a3
; beqz a4, 0xc
; mv a3, a5
; j 8
; bnez a4, 8
; mv a3, zero
; srl a5, a0, a4
; or a5, a3, a5
; addi t0, zero, 0x40
; srl a3, a1, a4
; andi a4, a2, 0x7f
; bltu a4, t0, 0x10
; mv a0, a3
; mv a1, zero
; j 0xc
; bgeu a4, t0, 0xc
; mv a0, a5
; mv a1, a3
; ret
Expand All @@ -1580,8 +1569,8 @@ block0(v0: i128, v1: i128):
; andi a5,a2,63
; li a3,64
; sub a3,a3,a5
; sll a3,a1,a3
; select a3,a3,zero##condition=(a5 ne zero)
; sll a4,a1,a3
; select a3,a4,zero##condition=(a5 ne zero)
; srl a4,a0,a5
; or s11,a3,a4
; li a3,64
Expand All @@ -1607,18 +1596,18 @@ block0(v0: i128, v1: i128):
; andi a5, a2, 0x3f
; addi a3, zero, 0x40
; sub a3, a3, a5
; sll a3, a1, a3
; sll a4, a1, a3
; mv a3, a4
; bnez a5, 8
; mv a3, zero
; srl a4, a0, a5
; or s11, a3, a4
; addi a3, zero, 0x40
; srl a4, a1, a5
; andi a5, a2, 0x7f
; bltu a5, a3, 0x10
; mv a0, a4
; mv a1, zero
; j 0xc
; bgeu a5, a3, 0xc
; mv a0, s11
; mv a1, a4
; ld s11, 8(sp)
Expand Down Expand Up @@ -1658,25 +1647,22 @@ block0(v0: i128, v1: i8):
; addi a3, zero, 0x40
; sub a3, a3, a4
; sll a5, a1, a3
; beqz a4, 0xc
; mv a3, a5
; j 8
; bnez a4, 8
; mv a3, zero
; srl a5, a0, a4
; or a5, a3, a5
; addi a0, zero, 0x40
; sra a3, a1, a4
; addi a4, zero, -1
; bgez a1, 0xc
; mv t4, a4
; j 8
; bltz a1, 8
; mv t4, zero
; addi a4, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a4, 0x10
; mv a0, a3
; mv a1, t4
; j 0xc
; bgeu a2, a4, 0xc
; mv a0, a5
; mv a1, a3
; ret
Expand All @@ -1698,17 +1684,17 @@ block0(v0: i128, v1: i128):
; andi a5,a2,63
; li a3,64
; sub a3,a3,a5
; sll a3,a1,a3
; select a3,a3,zero##condition=(a5 ne zero)
; sll a4,a1,a3
; select a3,a4,zero##condition=(a5 ne zero)
; srl a4,a0,a5
; or s11,a3,a4
; li a3,64
; sra a3,a1,a5
; li a5,-1
; select a5,a5,zero##condition=(a1 slt zero)
; li a4,64
; select a4,a5,zero##condition=(a1 slt zero)
; li a5,64
; andi a2,a2,127
; select [a0,a1],[a3,a5],[s11,a3]##condition=(a2 uge a4)
; select [a0,a1],[a3,a4],[s11,a3]##condition=(a2 uge a5)
; ld s11,8(sp)
; addi sp,sp,16
; ld ra,8(sp)
Expand All @@ -1728,22 +1714,23 @@ block0(v0: i128, v1: i128):
; andi a5, a2, 0x3f
; addi a3, zero, 0x40
; sub a3, a3, a5
; sll a3, a1, a3
; sll a4, a1, a3
; mv a3, a4
; bnez a5, 8
; mv a3, zero
; srl a4, a0, a5
; or s11, a3, a4
; addi a3, zero, 0x40
; sra a3, a1, a5
; addi a5, zero, -1
; mv a4, a5
; bltz a1, 8
; mv a5, zero
; addi a4, zero, 0x40
; mv a4, zero
; addi a5, zero, 0x40
; andi a2, a2, 0x7f
; bltu a2, a4, 0x10
; mv a0, a3
; mv a1, a5
; j 0xc
; mv a1, a4
; bgeu a2, a5, 0xc
; mv a0, s11
; mv a1, a3
; ld s11, 8(sp)
Expand Down
7 changes: 3 additions & 4 deletions cranelift/filetests/filetests/isa/riscv64/brif.clif
Original file line number Diff line number Diff line change
Expand Up @@ -367,15 +367,14 @@ block2:
; slt a5, a3, a1
; sltu a4, a2, a0
; xor a0, a3, a1
; bnez a0, 0xc
; mv a1, a4
; j 8
; beqz a0, 8
; mv a1, a5
; bnez a1, 0xc
; block1: ; offset 0x20
; block1: ; offset 0x1c
; mv a0, zero
; ret
; block2: ; offset 0x28
; block2: ; offset 0x24
; addi a0, zero, 1
; ret

Expand Down
Loading

0 comments on commit dac71f9

Please sign in to comment.