Skip to content

Commit

Permalink
Fix some i128 shift-related bugs in x64 backend.
Browse files Browse the repository at this point in the history
This fixes bytecodealliance#2672 and bytecodealliance#2679, and also fixes an incorrect instruction
emission (`test` with small immediate) that we had missed earlier.

The shift-related fixes have to do with (i) shifts by 0 bits, as a
special case that must be handled; and (ii) shifts by a 128-bit amount,
which we can handle by just dropping the upper half (we only use 3--7
bits of shift amount).

This adjusts the lowerings appropriately, and also adds run-tests to
ensure that the lowerings actually execute correctly (previously we only
had compile-tests with golden lowerings; I'd like to correct this for
more ops eventually, adding run-tests beyond what the Wasm spec and
frontend covers).
  • Loading branch information
cfallin committed Feb 23, 2021
1 parent 98d3e68 commit 0f3e00b
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 316 deletions.
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ pub(crate) fn emit(
RegMemImm::Imm { simm32 } => {
// FIXME JRS 2020Feb11: there are shorter encodings for
// cmp $imm, rax/eax/ax/al.
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
let use_imm8 = is_cmp && low8_will_sign_extend_to_32(*simm32);

// And also here we use the "normal" G-E ordering.
let opcode = if is_cmp {
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2999,6 +2999,11 @@ fn test_x64_emit() {
"48855763",
"testq 99(%rdi), %rdx",
));
insns.push((
Inst::test_rmi_r(OperandSize::Size64, RegMemImm::imm(127), rdx),
"48F7C27F000000",
"testq $127, %rdx",
));
insns.push((
Inst::test_rmi_r(OperandSize::Size64, RegMemImm::imm(76543210), rdx),
"48F7C2EAF48F04",
Expand Down
63 changes: 53 additions & 10 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -886,8 +886,10 @@ fn emit_shl_i128<C: LowerCtx<I = Inst>>(
// sub amt, amt_src
// mov tmp3, src_lo
// shr tmp3, amt
// or tmp3, tmp2
// xor dst_lo, dst_lo
// test amt_src, 127
// cmovz tmp2, dst_lo
// or tmp3, tmp2
// mov amt, amt_src
// and amt, 64
// cmovz dst_hi, tmp3
Expand Down Expand Up @@ -945,6 +947,24 @@ fn emit_shl_i128<C: LowerCtx<I = Inst>>(
None,
tmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));

ctx.emit(Inst::test_rmi_r(
OperandSize::Size64,
RegMemImm::imm(127),
amt_src,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(dst_lo.to_reg()),
tmp2,
));

ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
Expand All @@ -953,12 +973,6 @@ fn emit_shl_i128<C: LowerCtx<I = Inst>>(
tmp3,
));

ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));
// This isn't semantically necessary, but it keeps the
// register allocator happy, because it cannot otherwise
// infer that cmovz + cmovnz always defines dst_hi.
Expand Down Expand Up @@ -1011,11 +1025,14 @@ fn emit_shr_i128<C: LowerCtx<I = Inst>>(
// mov tmp1, src_hi
// {u,s}shr tmp1, amt_src
// mov tmp2, src_lo
// {u,s}shr tmp2, amt_src
// ushr tmp2, amt_src
// mov amt, 64
// sub amt, amt_src
// mov tmp3, src_hi
// shl tmp3, amt
// xor dst_lo, dst_lo
// test amt_src, 127
// cmovz tmp3, dst_lo
// or tmp3, tmp2
// if is_signed:
// mov dst_hi, src_hi
Expand Down Expand Up @@ -1053,7 +1070,13 @@ fn emit_shr_i128<C: LowerCtx<I = Inst>>(
amt_src,
types::I64,
));
ctx.emit(Inst::shift_r(OperandSize::Size64, shift_kind, None, tmp2));
// N.B.: right-shift of *lower* half is *always* unsigned (its MSB is not a sign bit).
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
None,
tmp2,
));

ctx.emit(Inst::imm(OperandSize::Size64, 64, amt));
ctx.emit(Inst::alu_rmi_r(
Expand All @@ -1076,6 +1099,24 @@ fn emit_shr_i128<C: LowerCtx<I = Inst>>(
tmp3,
));

ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));
ctx.emit(Inst::test_rmi_r(
OperandSize::Size64,
RegMemImm::imm(127),
amt_src,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(dst_lo.to_reg()),
tmp3,
));

ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
Expand Down Expand Up @@ -1957,7 +1998,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let cst = (cst as u8) & (dst_ty.bits() as u8 - 1);
(Some(cst), None)
} else {
(None, Some(put_input_in_reg(ctx, inputs[1])))
// We can ignore upper registers if shift amount is multi-reg, because we
// are taking the shift amount mod 2^(lhs_width) anyway.
(None, Some(put_input_in_regs(ctx, inputs[1]).regs()[0]))
};

let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
Expand Down
Loading

0 comments on commit 0f3e00b

Please sign in to comment.