Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable the simd_i16x8_q15mulr_sat_s test on AArch64 #3035

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
("simd", "simd_conversions")
| ("simd", "simd_i16x8_extadd_pairwise_i8x16")
| ("simd", "simd_i16x8_extmul_i8x16")
| ("simd", "simd_i16x8_q15mulr_sat_s")
| ("simd", "simd_i32x4_extadd_pairwise_i16x8")
| ("simd", "simd_i32x4_extmul_i16x8")
| ("simd", "simd_i32x4_trunc_sat_f64x2")
Expand Down
27 changes: 27 additions & 0 deletions cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2479,6 +2479,33 @@ pub(crate) fn define(
.operands_out(vec![a]),
);

let I16or32 = &TypeVar::new(
"I16or32",
"A scalar or vector integer type with 16- or 32-bit numbers",
TypeSetBuilder::new().ints(16..32).simd_lanes(4..8).build(),
);

let qx = &Operand::new("x", I16or32);
let qy = &Operand::new("y", I16or32);
let qa = &Operand::new("a", I16or32);

ig.push(
Inst::new(
"sqmul_round_sat",
r#"
Fixed-point multiplication of numbers in the QN format, where N + 1
is the number bitwidth:
`a := signed_saturate((x * y + 1 << (Q - 1)) >> Q)`

Polymorphic over all integer types (scalar and vector) with 16- or
32-bit numbers.
"#,
&formats.binary,
)
.operands_in(vec![qx, qy])
.operands_out(vec![qa]),
);

ig.push(
Inst::new(
"udiv",
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,14 @@ impl MachInstEmit for Inst {
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
VecALUOp::Sqrdmulh => {
debug_assert!(
size.lane_size() == ScalarSize::Size16
|| size.lane_size() == ScalarSize::Size32
);

(0b001_01110_00_1 | enc_size << 1, 0b101101)
}
};
let top11 = match alu_op {
VecALUOp::Smull | VecALUOp::Smull2 => top11,
Expand Down
24 changes: 24 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3610,6 +3610,30 @@ fn test_aarch64_binemit() {
"smull2 v8.2d, v12.4s, v14.4s",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd: writable_vreg(31),
rn: vreg(0),
rm: vreg(31),
size: VectorSize::Size16x8,
},
"1FB47F6E",
"sqrdmulh v31.8h, v0.8h, v31.8h",
));

insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd: writable_vreg(7),
rn: vreg(7),
rm: vreg(23),
size: VectorSize::Size32x2,
},
"E7B4B72E",
"sqrdmulh v7.2s, v7.2s, v23.2s",
));

insns.push((
Inst::VecMisc {
op: VecMisc2::Not,
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,8 @@ pub enum VecALUOp {
Smull,
/// Signed multiply long (high halves)
Smull2,
/// Signed saturating rounding doubling multiply returning high half
Sqrdmulh,
}

/// A Vector miscellaneous operation with two registers.
Expand Down Expand Up @@ -3980,6 +3982,7 @@ impl Inst {
VecALUOp::Zip1 => ("zip1", size),
VecALUOp::Smull => ("smull", size),
VecALUOp::Smull2 => ("smull2", size),
VecALUOp::Sqrdmulh => ("sqrdmulh", size),
};
let rd_size = match alu_op {
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
Expand Down
39 changes: 29 additions & 10 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1650,8 +1650,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
panic!("table_addr should have been removed by legalization!");
}

Opcode::ConstAddr => unimplemented!(),

Opcode::Nop => {
// Nothing.
}
Expand Down Expand Up @@ -2684,11 +2682,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}

Opcode::Vsplit | Opcode::Vconcat => {
// TODO
panic!("Vector ops not implemented.");
}

Opcode::Isplit => {
assert_eq!(
ctx.input_ty(insn, 0),
Expand Down Expand Up @@ -3524,9 +3517,35 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
},

Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
Opcode::FvpromoteLow => unimplemented!("FvpromoteLow"),
Opcode::Fvdemote => unimplemented!("Fvdemote"),
Opcode::SqmulRoundSat => {
let ty = ty.unwrap();

if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
return Err(CodegenError::Unsupported(format!(
"Unsupported type: {:?}",
ty
)));
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);

ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Sqrdmulh,
rd,
rn,
rm,
size: VectorSize::from_ty(ty),
});
}

Opcode::ConstAddr
| Opcode::FcvtLowFromSint
| Opcode::Fvdemote
| Opcode::FvpromoteLow
| Opcode::Vconcat
| Opcode::Vsplit => unimplemented!("lowering {}", op),
}

Ok(())
Expand Down
19 changes: 10 additions & 9 deletions cranelift/codegen/src/isa/s390x/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2458,11 +2458,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}

Opcode::TlsValue => {
panic!("Thread-local storage support not implemented!");
unimplemented!("Thread-local storage support not implemented!");
}

Opcode::GetPinnedReg | Opcode::SetPinnedReg => {
panic!("Pinned register support not implemented!");
unimplemented!("Pinned register support not implemented!");
}

Opcode::Icmp => {
Expand Down Expand Up @@ -2679,10 +2679,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty));
if endianness == Endianness::Little {
panic!("Little-endian atomic operations not implemented");
unimplemented!("Little-endian atomic operations not implemented");
}
if ty_bits(ty) < 32 {
panic!("Sub-word atomic operations not implemented");
unimplemented!("Sub-word atomic operations not implemented");
}
let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap());
let (alu_op, rn) = match op {
Expand All @@ -2701,7 +2701,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
(choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg())
}
_ => panic!("AtomicRmw operation type {:?} not implemented", op),
_ => unimplemented!("AtomicRmw operation type {:?} not implemented", op),
};
let mem = MemArg::reg(addr, flags);
ctx.emit(Inst::AtomicRmw {
Expand All @@ -2721,10 +2721,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap();
assert!(is_valid_atomic_transaction_ty(ty));
if endianness == Endianness::Little {
panic!("Little-endian atomic operations not implemented");
unimplemented!("Little-endian atomic operations not implemented");
}
if ty_bits(ty) < 32 {
panic!("Sub-word atomic operations not implemented");
unimplemented!("Sub-word atomic operations not implemented");
}
let mem = MemArg::reg(addr, flags);
ctx.emit(Inst::gen_move(rd, rm, ty));
Expand Down Expand Up @@ -2865,13 +2865,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::UwidenLow
| Opcode::UwidenHigh
| Opcode::WideningPairwiseDotProductS
| Opcode::SqmulRoundSat
| Opcode::FvpromoteLow
| Opcode::Fvdemote => {
// TODO
panic!("Vector ops not implemented.");
unimplemented!("Vector ops not implemented.");
}

Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."),
Opcode::Isplit | Opcode::Iconcat => unimplemented!("Wide integer ops not implemented."),

Opcode::Spill
| Opcode::Fill
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6001,6 +6001,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
unimplemented!("Vector split/concat ops not implemented.");
}

Opcode::SqmulRoundSat => unimplemented!("unimplemented lowering for opcode {:?}", op),

// Opcodes that should be removed by legalization. These should
// eventually be removed if/when we replace in-situ legalization with
// something better.
Expand Down
Binary file modified cranelift/codegen/src/preopt.serialized
Binary file not shown.
1 change: 1 addition & 0 deletions cranelift/interpreter/src/step.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ where
Opcode::AtomicStore => unimplemented!("AtomicStore"),
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),

// TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
Expand Down
8 changes: 6 additions & 2 deletions cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1885,8 +1885,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let arg = pop1_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().popcnt(arg));
}
Operator::I16x8Q15MulrSatS
| Operator::I16x8ExtMulLowI8x16S
Operator::I16x8Q15MulrSatS => {
let (a, b) = pop2_with_bitcast(state, I16X8, builder);

state.push1(builder.ins().sqmul_round_sat(a, b))
}
Operator::I16x8ExtMulLowI8x16S
| Operator::I16x8ExtMulHighI8x16S
| Operator::I16x8ExtMulLowI8x16U
| Operator::I16x8ExtMulHighI8x16U
Expand Down