Skip to content

Commit

Permalink
pulley: Initial scaffold of SIMD support
Browse files Browse the repository at this point in the history
This commit fills out some of the initial infrastructure necessary for
supporting the SIMD proposal to WebAssembly in the Pulley interpreter,
namely 128-bit simd. The `VRegVal` union has been filled out with
various types, endianness questions are settled, and initial
implementations of a suite of opcodes are added to get a basic set of
tests working throughout the backend.

cc #9783
  • Loading branch information
alexcrichton committed Dec 13, 2024
1 parent ae8fc9e commit 9228b69
Show file tree
Hide file tree
Showing 12 changed files with 474 additions and 62 deletions.
60 changes: 44 additions & 16 deletions cranelift/codegen/meta/src/pulley.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,47 @@ const OPS: &[Inst<'_>] = pulley_interpreter::for_each_op!(define);
const EXTENDED_OPS: &[Inst<'_>] = pulley_interpreter::for_each_extended_op!(define);

enum Operand<'a> {
Normal { name: &'a str, ty: &'a str },
Writable { name: &'a str, ty: &'a str },
TrapCode { name: &'a str, ty: &'a str },
Binop { reg: &'a str },
Normal {
name: &'a str,
ty: &'a str,
},
Writable {
name: &'a str,
ty: &'a str,
},
TrapCode {
name: &'a str,
ty: &'a str,
},
Binop {
dst: &'a str,
src1: &'a str,
src2: &'a str,
},
}

impl Inst<'_> {
fn operands(&self) -> impl Iterator<Item = Operand<'_>> {
self.fields
.iter()
.map(|(name, ty)| match (*name, *ty) {
("operands", "BinaryOperands < XReg >") => Operand::Binop { reg: "XReg" },
("operands", "BinaryOperands < FReg >") => Operand::Binop { reg: "FReg" },
("operands", binop) => {
// Parse "BinaryOperands < A >"` as A/A/A
// Parse "BinaryOperands < A, B >"` as A/B/A
// Parse "BinaryOperands < A, B, C >"` as A/B/C
let mut parts = binop
.strip_prefix("BinaryOperands <")
.unwrap()
.strip_suffix(">")
.unwrap()
.trim()
.split(',')
.map(|x| x.trim());
let dst = parts.next().unwrap();
let src1 = parts.next().unwrap_or(dst);
let src2 = parts.next().unwrap_or(dst);
Operand::Binop { dst, src1, src2 }
}
("dst", ty) => Operand::Writable { name, ty },
(name, ty) => Operand::Normal { name, ty },
})
Expand Down Expand Up @@ -109,7 +137,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
pat.push_str(",");
format_string.push_str(&format!(" // trap={{{name}:?}}"));
}
Operand::Binop { reg: _ } => {
Operand::Binop { .. } => {
pat.push_str("dst, src1, src2,");
format_string.push_str(" {dst}, {src1}, {src2}");
locals.push_str(&format!("let dst = reg_name(*dst.to_reg());\n"));
Expand Down Expand Up @@ -161,7 +189,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
}
}
Operand::TrapCode { .. } => {}
Operand::Binop { reg: _ } => {
Operand::Binop { .. } => {
pat.push_str("dst, src1, src2,");
uses.push("src1");
uses.push("src2");
Expand Down Expand Up @@ -221,7 +249,7 @@ pub fn generate_rust(filename: &str, out_dir: &Path) -> Result<(), Error> {
pat.push_str(",");
trap.push_str(&format!("sink.add_trap({name});\n"));
}
Operand::Binop { reg: _ } => {
Operand::Binop { .. } => {
pat.push_str("dst, src1, src2,");
args.push_str(
"pulley_interpreter::regs::BinaryOperands::new(dst, src1, src2),",
Expand Down Expand Up @@ -265,10 +293,10 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
Operand::Writable { name, ty } => {
isle.push_str(&format!("\n ({name} Writable{ty})"));
}
Operand::Binop { reg } => {
isle.push_str(&format!("\n (dst Writable{reg})"));
isle.push_str(&format!("\n (src1 {reg})"));
isle.push_str(&format!("\n (src2 {reg})"));
Operand::Binop { dst, src1, src2 } => {
isle.push_str(&format!("\n (dst Writable{dst})"));
isle.push_str(&format!("\n (src1 {src1})"));
isle.push_str(&format!("\n (src2 {src2})"));
}
}
}
Expand Down Expand Up @@ -303,13 +331,13 @@ pub fn generate_isle(filename: &str, out_dir: &Path) -> Result<(), Error> {
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
result = Some(ty);
}
Operand::Binop { reg } => {
isle.push_str(&format!("{reg} {reg}"));
Operand::Binop { dst, src1, src2 } => {
isle.push_str(&format!("{src1} {src2}"));
rule.push_str("src1 src2");
ops.push("src1");
ops.push("src2");
assert!(result.is_none(), "{} has >1 result", inst.snake_name);
result = Some(reg);
result = Some(dst);
}
}
isle.push_str(" ");
Expand Down
21 changes: 11 additions & 10 deletions cranelift/codegen/src/isa/pulley_shared/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,17 +510,18 @@ where
_target_vector_bytes: u32,
_isa_flags: &PulleyFlags,
) -> u32 {
// Spill slots are the size of a "word" or a pointer, but Pulley
// registers are 8-byte for integers/floats regardless of pointer size.
// Calculate the number of slots necessary to store 8 bytes.
let slots_for_8bytes = match P::pointer_width() {
PointerWidth::PointerWidth32 => 2,
PointerWidth::PointerWidth64 => 1,
};
match rc {
// Spilling an integer or float register requires spilling 8 bytes,
// and spill slots are defined in terms of "word bytes" or the size
// of a pointer. That means on 32-bit pulley we need to take up two
// spill slots where on 64-bit pulley we need to only take up one
// spill slot for integers.
RegClass::Int | RegClass::Float => match P::pointer_width() {
PointerWidth::PointerWidth32 => 2,
PointerWidth::PointerWidth64 => 1,
},
RegClass::Vector => unreachable!(),
// Int/float registers are 8-bytes
RegClass::Int | RegClass::Float => slots_for_8bytes,
// Vector registers are 16 bytes
RegClass::Vector => 2 * slots_for_8bytes,
}
}

Expand Down
10 changes: 10 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,16 @@
(rule (pulley_fstore amode src ty flags)
(SideEffectNoResult.Inst (MInst.FStore amode src ty flags)))

(decl pulley_vload (Amode Type MemFlags) VReg)
(rule (pulley_vload amode ty flags)
(let ((dst WritableVReg (temp_writable_vreg))
(_ Unit (emit (MInst.VLoad dst amode ty flags))))
dst))

(decl pulley_vstore (Amode VReg Type MemFlags) SideEffectNoResult)
(rule (pulley_vstore amode src ty flags)
(SideEffectNoResult.Inst (MInst.VStore amode src ty flags)))

(decl gen_br_table (XReg MachLabel BoxVecMachLabel) Unit)
(rule (gen_br_table idx default labels)
(emit (MInst.BrTable idx default labels)))
Expand Down
14 changes: 2 additions & 12 deletions cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,18 +453,8 @@ where
}

fn worst_case_size() -> CodeOffset {
// `BrIfXeq32 { a, b, taken, not_taken }` expands to `br_if_xeq32 a, b, taken; jump not_taken`.
//
// The first instruction is seven bytes long:
// * 1 byte opcode
// * 1 byte `a` register encoding
// * 1 byte `b` register encoding
// * 4 byte `taken` displacement
//
// And the second instruction is five bytes long:
// * 1 byte opcode
// * 4 byte `not_taken` displacement
12
// `Vconst128 { dst, imm }` is 18 bytes (opcode + dst + 16-byte imm)
18
}

fn ref_type_regclass(_settings: &settings::Flags) -> RegClass {
Expand Down
32 changes: 32 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@
(rule (lower (has_type $I64 (iadd a b)))
(pulley_xadd64 a b))

(rule (lower (has_type $I8X16 (iadd a b))) (pulley_vaddi8x16 a b))
(rule (lower (has_type $I16X8 (iadd a b))) (pulley_vaddi16x8 a b))
(rule (lower (has_type $I32X4 (iadd a b))) (pulley_vaddi32x4 a b))
(rule (lower (has_type $I64X2 (iadd a b))) (pulley_vaddi64x2 a b))

;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I8 (isub a b)))
Expand Down Expand Up @@ -192,6 +197,11 @@
(rule (lower (has_type $I64 (ishl a b)))
(pulley_xshl64 a b))

(rule (lower (has_type $I8X16 (ishl a b))) (pulley_vshli8x16 a b))
(rule (lower (has_type $I16X8 (ishl a b))) (pulley_vshli16x8 a b))
(rule (lower (has_type $I32X4 (ishl a b))) (pulley_vshli32x4 a b))
(rule (lower (has_type $I64X2 (ishl a b))) (pulley_vshli64x2 a b))

;;;; Rules for `ushr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I32 (ushr a b)))
Expand All @@ -200,6 +210,11 @@
(rule (lower (has_type $I64 (ushr a b)))
(pulley_xshr64_u a b))

(rule (lower (has_type $I8X16 (ushr a b))) (pulley_vshri8x16_u a b))
(rule (lower (has_type $I16X8 (ushr a b))) (pulley_vshri16x8_u a b))
(rule (lower (has_type $I32X4 (ushr a b))) (pulley_vshri32x4_u a b))
(rule (lower (has_type $I64X2 (ushr a b))) (pulley_vshri64x2_u a b))

;;;; Rules for `sshr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type $I32 (sshr a b)))
Expand All @@ -208,6 +223,11 @@
(rule (lower (has_type $I64 (sshr a b)))
(pulley_xshr64_s a b))

(rule (lower (has_type $I8X16 (sshr a b))) (pulley_vshri8x16_s a b))
(rule (lower (has_type $I16X8 (sshr a b))) (pulley_vshri16x8_s a b))
(rule (lower (has_type $I32X4 (sshr a b))) (pulley_vshri32x4_s a b))
(rule (lower (has_type $I64X2 (sshr a b))) (pulley_vshri64x2_s a b))

;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (fits_in_32 _) (band a b)))
Expand Down Expand Up @@ -414,6 +434,9 @@
(rule 1 (lower (has_type $I64 (sload32 flags addr offset)))
(pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64)))

(rule 2 (lower (has_type (ty_vec128 ty) (load flags addr offset)))
(pulley_vload (amode addr offset) ty flags))

;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (store flags src @ (value_type (ty_int ty)) addr offset))
Expand All @@ -431,6 +454,9 @@
(rule (lower (istore32 flags src addr offset))
(side_effect (pulley_xstore (amode addr offset) src $I32 flags)))

(rule 2 (lower (store flags src @ (value_type (ty_vec128 ty)) addr offset))
(side_effect (pulley_vstore (amode addr offset) src ty flags)))

;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (stack_addr stack_slot offset))
Expand Down Expand Up @@ -622,6 +648,8 @@

(rule (lower (has_type $F32 (fadd a b))) (pulley_fadd32 a b))
(rule (lower (has_type $F64 (fadd a b))) (pulley_fadd64 a b))
(rule (lower (has_type $F32X4 (fadd a b))) (pulley_vaddf32x4 a b))
(rule (lower (has_type $F64X2 (fadd a b))) (pulley_vaddf64x2 a b))

;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -687,3 +715,7 @@

(rule (lower (has_type $F32 (fabs a))) (pulley_fabs32 a))
(rule (lower (has_type $F64 (fabs a))) (pulley_fabs64 a))

;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (ty_vec128 _) (vconst (u128_from_constant a)))) (pulley_vconst128 a))
9 changes: 0 additions & 9 deletions crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,8 @@ impl WastTest {
"misc_testsuite/simd/almost-extmul.wast",
"misc_testsuite/simd/canonicalize-nan.wast",
"misc_testsuite/simd/cvt-from-uint.wast",
"misc_testsuite/simd/interesting-float-splat.wast",
"misc_testsuite/simd/issue4807.wast",
"misc_testsuite/simd/issue6725-no-egraph-panic.wast",
"misc_testsuite/simd/issue_3173_select_v128.wast",
"misc_testsuite/simd/issue_3327_bnot_lowering.wast",
"misc_testsuite/simd/load_splat_out_of_bounds.wast",
"misc_testsuite/simd/replace-lane-preserve.wast",
Expand All @@ -418,11 +416,6 @@ impl WastTest {
"misc_testsuite/threads/MP_wait.wast",
"misc_testsuite/threads/SB_atomic.wast",
"misc_testsuite/threads/load-store-alignment.wast",
"misc_testsuite/winch/_simd_address.wast",
"misc_testsuite/winch/_simd_const.wast",
"misc_testsuite/winch/_simd_load.wast",
"misc_testsuite/winch/_simd_multivalue.wast",
"misc_testsuite/winch/_simd_store.wast",
"spec_testsuite/proposals/annotations/simd_lane.wast",
"spec_testsuite/proposals/multi-memory/simd_memory-multi.wast",
"spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast",
Expand All @@ -433,7 +426,6 @@ impl WastTest {
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
"spec_testsuite/proposals/threads/atomic.wast",
"spec_testsuite/simd_address.wast",
"spec_testsuite/simd_align.wast",
"spec_testsuite/simd_bit_shift.wast",
"spec_testsuite/simd_bitwise.wast",
Expand Down Expand Up @@ -484,7 +476,6 @@ impl WastTest {
"spec_testsuite/simd_load_splat.wast",
"spec_testsuite/simd_load_zero.wast",
"spec_testsuite/simd_splat.wast",
"spec_testsuite/simd_store.wast",
"spec_testsuite/simd_store16_lane.wast",
"spec_testsuite/simd_store32_lane.wast",
"spec_testsuite/simd_store64_lane.wast",
Expand Down
20 changes: 19 additions & 1 deletion pulley/src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,15 @@ impl Decode for u64 {
}
}

impl Decode for u128 {
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
where
T: BytecodeStream,
{
Ok(u128::from_le_bytes(bytecode.read()?))
}
}

impl Decode for i8 {
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
where
Expand Down Expand Up @@ -339,6 +348,15 @@ impl Decode for i64 {
}
}

impl Decode for i128 {
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
where
T: BytecodeStream,
{
Ok(i128::from_le_bytes(bytecode.read()?))
}
}

impl Decode for XReg {
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
where
Expand Down Expand Up @@ -404,7 +422,7 @@ impl Decode for ExtendedOpcode {
}
}

impl<R: Reg> Decode for BinaryOperands<R> {
impl<D: Reg, S1: Reg, S2: Reg> Decode for BinaryOperands<D, S1, S2> {
fn decode<T>(bytecode: &mut T) -> Result<Self, T::Error>
where
T: BytecodeStream,
Expand Down
25 changes: 23 additions & 2 deletions pulley/src/disas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ impl Disas for i64 {
}
}

impl Disas for i128 {
fn disas(&self, _position: usize, disas: &mut String) {
write!(disas, "{self}").unwrap();
}
}

impl Disas for u8 {
fn disas(&self, _position: usize, disas: &mut String) {
write!(disas, "{self}").unwrap();
Expand All @@ -173,6 +179,12 @@ impl Disas for u64 {
}
}

impl Disas for u128 {
fn disas(&self, _position: usize, disas: &mut String) {
write!(disas, "{self}").unwrap();
}
}

impl Disas for PcRelOffset {
fn disas(&self, position: usize, disas: &mut String) {
let offset = isize::try_from(i32::from(*self)).unwrap();
Expand All @@ -192,9 +204,18 @@ fn disas_list<T: Disas>(position: usize, disas: &mut String, iter: impl IntoIter
}
}

impl<R: Reg + Disas> Disas for BinaryOperands<R> {
impl<D, S1, S2> Disas for BinaryOperands<D, S1, S2>
where
D: Reg + Disas,
S1: Reg + Disas,
S2: Reg + Disas,
{
fn disas(&self, position: usize, disas: &mut String) {
disas_list(position, disas, [self.dst, self.src1, self.src2])
self.dst.disas(position, disas);
write!(disas, ", ").unwrap();
self.src1.disas(position, disas);
write!(disas, ", ").unwrap();
self.src2.disas(position, disas);
}
}

Expand Down
Loading

0 comments on commit 9228b69

Please sign in to comment.