From 04a373678eefa60dd735b1e2a225e5a09028298d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 12 Dec 2024 07:08:21 -0700 Subject: [PATCH] pulley: Refactor conditional branches and table codegen (#9794) This commit first fixed an issue with table access codegen to disable spectre mitigations on Pulley targets like how spectre is disabled for memory accesses as well. This unblocked many tests related to tables which then led to a different error about a `trapnz` with an 8-bit value not being supported. In fixing `trapnz` with 8-bit values this PR went ahead and did a general-purpose refactoring for how conditional branches are managed. Previously conditional traps and conditional branches had some duplicated logic and the goal was to unify everything. There is now a single `Cond` which represents the condition of a conditional jump which is used uniformly for all locations such as `select`, `brif`, and `trap[n]z`. This new type represents all the sorts of conditional branches that can be done in Pulley, for example integer comparisons and whether or not a register is zero. This `Cond` type has various helpers for printing it, inverting it, collecting operands, emission, etc. The end result is that it's a bit wordy to work with `Cond` right now due to the size of the variants but all locations working with conditional traps are deduplicated and now it's just repetitive logic rather than duplicated logic. Putting all of this together gets a large batch of spec tests working. I'll note that this does remove a feature where `trapnz` was turned into nothing or an unconditional trap if the argument was a constant, but that feels like an optimization perhaps best left for the middle-end rather than doing it in the backend. cc #9783 --- .../codegen/src/isa/pulley_shared/inst.isle | 84 +++---- .../src/isa/pulley_shared/inst/args.rs | 144 +++++++++++ .../src/isa/pulley_shared/inst/emit.rs | 238 +++--------------- .../codegen/src/isa/pulley_shared/inst/mod.rs | 157 +----------- .../codegen/src/isa/pulley_shared/lower.isle | 153 +++++------ .../src/isa/pulley_shared/lower/isle.rs | 6 +- .../filetests/isa/pulley32/brif.clif | 14 +- .../filetests/isa/pulley32/trap.clif | 60 +++-- .../filetests/isa/pulley64/brif.clif | 14 +- .../filetests/isa/pulley64/trap.clif | 60 +++-- crates/cranelift/src/translate/table.rs | 7 +- crates/wasmtime/Cargo.toml | 1 + crates/wast-util/src/lib.rs | 39 --- tests/disas/pulley/epoch-simple.wat | 18 +- 14 files changed, 388 insertions(+), 607 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 41a4492fa184..d9470260e2d6 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -30,8 +30,8 @@ ;;;; Actual Instructions ;;;; - ;; Trap if `src1 cond src2`. - (TrapIf (cond IntCC) (size OperandSize) (src1 XReg) (src2 XReg) (code TrapCode)) + ;; Trap if `cond` is true. + (TrapIf (cond Cond) (code TrapCode)) ;; Nothing. (Nop) @@ -60,16 +60,8 @@ ;; Unconditional jumps. (Jump (label MachLabel)) - ;; Jump to `then` if `c` is nonzero, otherwise to `else`. - (BrIf32 (c XReg) (taken MachLabel) (not_taken MachLabel)) - - ;; Compare-and-branch macro ops. - (BrIfXeq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - (BrIfXneq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - (BrIfXslt32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - (BrIfXslteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - (BrIfXult32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) - (BrIfXulteq32 (src1 XReg) (src2 XReg) (taken MachLabel) (not_taken MachLabel)) + ;; Jump to `then` if `c` is true, otherwise to `else`. + (BrIf (cond Cond) (taken MachLabel) (not_taken MachLabel)) ;; Load the memory address referenced by `mem` into `dst`. (LoadAddr (dst WritableXReg) (mem Amode)) @@ -95,6 +87,38 @@ ) ) +;; Helper type on conditional branches and traps to represent what the +;; condition that is being performed is. +;; +;; Used in `BrIf` and `TrapIf` above for example. +(type Cond + (enum + ;; True if `reg` contains a nonzero value in the low 32-bits. + (If32 (reg XReg)) + ;; True if `reg` contains a zero in the low 32-bits. + (IfNot32 (reg XReg)) + + ;; Conditionals for comparing the low 32-bits of two registers. + (IfXeq32 (src1 XReg) (src2 XReg)) + (IfXneq32 (src1 XReg) (src2 XReg)) + (IfXslt32 (src1 XReg) (src2 XReg)) + (IfXslteq32 (src1 XReg) (src2 XReg)) + (IfXult32 (src1 XReg) (src2 XReg)) + (IfXulteq32 (src1 XReg) (src2 XReg)) + + ;; Conditionals for comparing two 64-bit registers. + (IfXeq64 (src1 XReg) (src2 XReg)) + (IfXneq64 (src1 XReg) (src2 XReg)) + (IfXslt64 (src1 XReg) (src2 XReg)) + (IfXslteq64 (src1 XReg) (src2 XReg)) + (IfXult64 (src1 XReg) (src2 XReg)) + (IfXulteq64 (src1 XReg) (src2 XReg)) + ) +) + +(decl cond_invert (Cond) Cond) +(extern constructor cond_invert cond_invert) + (decl raw_inst_to_inst (RawInst) MInst) (rule (raw_inst_to_inst inst) (MInst.Raw inst)) (convert RawInst MInst raw_inst_to_inst) @@ -349,9 +373,9 @@ ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(decl pulley_trap_if (IntCC OperandSize XReg XReg TrapCode) SideEffectNoResult) -(rule (pulley_trap_if cond size src1 src2 code) - (SideEffectNoResult.Inst (MInst.TrapIf cond size src1 src2 code))) +(decl pulley_trap_if (Cond TrapCode) SideEffectNoResult) +(rule (pulley_trap_if cond code) + (SideEffectNoResult.Inst (MInst.TrapIf cond code))) (decl sp_reg () XReg) (extern constructor sp_reg sp_reg) @@ -372,33 +396,9 @@ (rule (pulley_jump label) (SideEffectNoResult.Inst (MInst.Jump label))) -(decl pulley_br_if32 (XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if32 c taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIf32 c taken not_taken))) - -(decl pulley_br_if_xeq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xeq32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXeq32 a b taken not_taken))) - -(decl pulley_br_if_xneq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xneq32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXneq32 a b taken not_taken))) - -(decl pulley_br_if_xslt32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xslt32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXslt32 a b taken not_taken))) - -(decl pulley_br_if_xslteq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xslteq32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXslteq32 a b taken not_taken))) - -(decl pulley_br_if_xult32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xult32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXult32 a b taken not_taken))) - -(decl pulley_br_if_xulteq32 (XReg XReg MachLabel MachLabel) SideEffectNoResult) -(rule (pulley_br_if_xulteq32 a b taken not_taken) - (SideEffectNoResult.Inst (MInst.BrIfXulteq32 a b taken not_taken))) +(decl pulley_br_if (Cond MachLabel MachLabel) SideEffectNoResult) +(rule (pulley_br_if cond taken not_taken) + (SideEffectNoResult.Inst (MInst.BrIf cond taken not_taken))) (decl pulley_xload (Amode Type MemFlags ExtKind) XReg) (rule (pulley_xload amode ty flags ext) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs index b00a0aa82b61..d28ae9c9d1dc 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs @@ -2,7 +2,9 @@ use super::*; use crate::machinst::abi::StackAMode; +use pulley_interpreter::encode; use pulley_interpreter::regs::Reg as _; +use std::fmt; /// A macro for defining a newtype of `Reg` that enforces some invariant about /// the wrapped `Reg` (such as that it is of a particular register class). @@ -229,3 +231,145 @@ pub enum OperandSize { /// 64 bits. Size64, } + +pub use crate::isa::pulley_shared::lower::isle::generated_code::Cond; + +impl Cond { + /// Collect register operands within `collector` for register allocation. + pub fn get_operands(&mut self, collector: &mut impl OperandVisitor) { + match self { + Cond::If32 { reg } | Cond::IfNot32 { reg } => collector.reg_use(reg), + + Cond::IfXeq32 { src1, src2 } + | Cond::IfXneq32 { src1, src2 } + | Cond::IfXslt32 { src1, src2 } + | Cond::IfXslteq32 { src1, src2 } + | Cond::IfXult32 { src1, src2 } + | Cond::IfXulteq32 { src1, src2 } + | Cond::IfXeq64 { src1, src2 } + | Cond::IfXneq64 { src1, src2 } + | Cond::IfXslt64 { src1, src2 } + | Cond::IfXslteq64 { src1, src2 } + | Cond::IfXult64 { src1, src2 } + | Cond::IfXulteq64 { src1, src2 } => { + collector.reg_use(src1); + collector.reg_use(src2); + } + } + } + + /// Encode this condition as a branch into `sink`. + /// + /// Note that the offset encoded to jump by is filled in as 0 and it's + /// assumed `MachBuffer` will come back and clean it up. + pub fn encode(&self, sink: &mut impl Extend) { + match self { + Cond::If32 { reg } => encode::br_if32(sink, reg, 0), + Cond::IfNot32 { reg } => encode::br_if_not32(sink, reg, 0), + Cond::IfXeq32 { src1, src2 } => encode::br_if_xeq32(sink, src1, src2, 0), + Cond::IfXneq32 { src1, src2 } => encode::br_if_xneq32(sink, src1, src2, 0), + Cond::IfXslt32 { src1, src2 } => encode::br_if_xslt32(sink, src1, src2, 0), + Cond::IfXslteq32 { src1, src2 } => encode::br_if_xslteq32(sink, src1, src2, 0), + Cond::IfXult32 { src1, src2 } => encode::br_if_xult32(sink, src1, src2, 0), + Cond::IfXulteq32 { src1, src2 } => encode::br_if_xulteq32(sink, src1, src2, 0), + Cond::IfXeq64 { src1, src2 } => encode::br_if_xeq64(sink, src1, src2, 0), + Cond::IfXneq64 { src1, src2 } => encode::br_if_xneq64(sink, src1, src2, 0), + Cond::IfXslt64 { src1, src2 } => encode::br_if_xslt64(sink, src1, src2, 0), + Cond::IfXslteq64 { src1, src2 } => encode::br_if_xslteq64(sink, src1, src2, 0), + Cond::IfXult64 { src1, src2 } => encode::br_if_xult64(sink, src1, src2, 0), + Cond::IfXulteq64 { src1, src2 } => encode::br_if_xulteq64(sink, src1, src2, 0), + } + } + + /// Inverts this conditional. + pub fn invert(&self) -> Cond { + match *self { + Cond::If32 { reg } => Cond::IfNot32 { reg }, + Cond::IfNot32 { reg } => Cond::If32 { reg }, + Cond::IfXeq32 { src1, src2 } => Cond::IfXneq32 { src1, src2 }, + Cond::IfXneq32 { src1, src2 } => Cond::IfXeq32 { src1, src2 }, + Cond::IfXeq64 { src1, src2 } => Cond::IfXneq64 { src1, src2 }, + Cond::IfXneq64 { src1, src2 } => Cond::IfXeq64 { src1, src2 }, + + // Note that for below the condition changes but the operands are + // also swapped. + Cond::IfXslt32 { src1, src2 } => Cond::IfXslteq32 { + src1: src2, + src2: src1, + }, + Cond::IfXslteq32 { src1, src2 } => Cond::IfXslt32 { + src1: src2, + src2: src1, + }, + Cond::IfXult32 { src1, src2 } => Cond::IfXulteq32 { + src1: src2, + src2: src1, + }, + Cond::IfXulteq32 { src1, src2 } => Cond::IfXult32 { + src1: src2, + src2: src1, + }, + Cond::IfXslt64 { src1, src2 } => Cond::IfXslteq64 { + src1: src2, + src2: src1, + }, + Cond::IfXslteq64 { src1, src2 } => Cond::IfXslt64 { + src1: src2, + src2: src1, + }, + Cond::IfXult64 { src1, src2 } => Cond::IfXulteq64 { + src1: src2, + src2: src1, + }, + Cond::IfXulteq64 { src1, src2 } => Cond::IfXult64 { + src1: src2, + src2: src1, + }, + } + } +} + +impl fmt::Display for Cond { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Cond::If32 { reg } => write!(f, "if32 {}", reg_name(**reg)), + Cond::IfNot32 { reg } => write!(f, "if_not32 {}", reg_name(**reg)), + Cond::IfXeq32 { src1, src2 } => { + write!(f, "if_xeq32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXneq32 { src1, src2 } => { + write!(f, "if_xneq32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXslt32 { src1, src2 } => { + write!(f, "if_xslt32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXslteq32 { src1, src2 } => { + write!(f, "if_xslteq32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXult32 { src1, src2 } => { + write!(f, "if_xult32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXulteq32 { src1, src2 } => { + write!(f, "if_xulteq32 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXeq64 { src1, src2 } => { + write!(f, "if_xeq64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXneq64 { src1, src2 } => { + write!(f, "if_xneq64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXslt64 { src1, src2 } => { + write!(f, "if_xslt64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXslteq64 { src1, src2 } => { + write!(f, "if_xslteq64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXult64 { src1, src2 } => { + write!(f, "if_xult64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + Cond::IfXulteq64 { src1, src2 } => { + write!(f, "if_xulteq64 {}, {}", reg_name(**src1), reg_name(**src2)) + } + } + } +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 463c920f4340..b03daa80fc8f 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -132,51 +132,17 @@ fn pulley_emit

( // Pseduo-instructions that don't actually encode to anything. Inst::Args { .. } | Inst::Rets { .. } | Inst::Unwind { .. } => {} - Inst::TrapIf { - cond, - size, - src1, - src2, - code, - } => { - let label = sink.defer_trap(*code); - - let cur_off = sink.cur_offset(); - sink.use_label_at_offset(cur_off + 3, label, LabelUse::Jump(3)); - - use ir::condcodes::IntCC::*; - use OperandSize::*; - match (cond, size) { - (Equal, Size32) => enc::br_if_xeq32(sink, src1, src2, 0), - (Equal, Size64) => enc::br_if_xeq64(sink, src1, src2, 0), - - (NotEqual, Size32) => enc::br_if_xneq32(sink, src1, src2, 0), - (NotEqual, Size64) => enc::br_if_xneq64(sink, src1, src2, 0), - - (SignedLessThan, Size32) => enc::br_if_xslt32(sink, src1, src2, 0), - (SignedLessThan, Size64) => enc::br_if_xslt64(sink, src1, src2, 0), - - (SignedLessThanOrEqual, Size32) => enc::br_if_xslteq32(sink, src1, src2, 0), - (SignedLessThanOrEqual, Size64) => enc::br_if_xslteq64(sink, src1, src2, 0), - - (UnsignedLessThan, Size32) => enc::br_if_xult32(sink, src1, src2, 0), - (UnsignedLessThan, Size64) => enc::br_if_xult64(sink, src1, src2, 0), - - (UnsignedLessThanOrEqual, Size32) => enc::br_if_xulteq32(sink, src1, src2, 0), - (UnsignedLessThanOrEqual, Size64) => enc::br_if_xulteq64(sink, src1, src2, 0), - - (SignedGreaterThan, Size32) => enc::br_if_xslt32(sink, src2, src1, 0), - (SignedGreaterThan, Size64) => enc::br_if_xslt64(sink, src2, src1, 0), - - (SignedGreaterThanOrEqual, Size32) => enc::br_if_xslteq32(sink, src2, src1, 0), - (SignedGreaterThanOrEqual, Size64) => enc::br_if_xslteq64(sink, src2, src1, 0), - - (UnsignedGreaterThan, Size32) => enc::br_if_xult32(sink, src2, src1, 0), - (UnsignedGreaterThan, Size64) => enc::br_if_xult64(sink, src2, src1, 0), - - (UnsignedGreaterThanOrEqual, Size32) => enc::br_if_xulteq32(sink, src2, src1, 0), - (UnsignedGreaterThanOrEqual, Size64) => enc::br_if_xulteq64(sink, src2, src1, 0), - } + Inst::TrapIf { cond, code } => { + let trap = sink.defer_trap(*code); + let not_trap = sink.get_label(); + + >::from(Inst::BrIf { + cond: cond.clone(), + taken: trap, + not_taken: not_trap, + }) + .emit(sink, emit_info, state); + sink.bind_label(not_trap, &mut state.ctrl_plane); } Inst::Nop => todo!(), @@ -247,142 +213,39 @@ fn pulley_emit

( enc::jump(sink, 0x00000000); } - Inst::BrIf32 { - c, + Inst::BrIf { + cond, taken, not_taken, } => { - // If taken. - let taken_start = *start_offset + 2; - let taken_end = taken_start + 4; - - sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(2)); + // Encode the inverted form of the branch. Branches always have + // their trailing 4 bytes as the relative offset which is what we're + // going to target here within the `MachBuffer`. let mut inverted = SmallVec::<[u8; 16]>::new(); - enc::br_if_not32(&mut inverted, c, 0x00000000); - debug_assert_eq!( - inverted.len(), - usize::try_from(taken_end - *start_offset).unwrap() - ); - + cond.invert().encode(&mut inverted); + let len = inverted.len() as u32; + debug_assert!(len > 4); + + // Use the `taken` label 4 bytes before the end of the instruction + // we're about to emit as that's the base of `PcRelOffset`. Note + // that the `Jump` here factors in the offset from the start of the + // instruction to the start of the relative offset, hence `len - 4` + // as the factor to adjust by. + let taken_end = *start_offset + len; + sink.use_label_at_offset(taken_end - 4, *taken, LabelUse::Jump(len - 4)); sink.add_cond_branch(*start_offset, taken_end, *taken, &inverted); - enc::br_if32(sink, c, 0x00000000); + cond.encode(sink); debug_assert_eq!(sink.cur_offset(), taken_end); - // If not taken. + // For the not-taken branch use an unconditional jump to the + // relevant label, and we know that the jump instruction is 5 bytes + // long where the final 4 bytes are the offset to jump by. let not_taken_start = taken_end + 1; let not_taken_end = not_taken_start + 4; - sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::Jump(1)); sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); enc::jump(sink, 0x00000000); - } - - Inst::BrIfXeq32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xeq32, - enc::br_if_xneq32, - ); - } - - Inst::BrIfXneq32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xneq32, - enc::br_if_xeq32, - ); - } - - Inst::BrIfXslt32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xslt32, - |s, src1, src2, x| enc::br_if_xslteq32(s, src2, src1, x), - ); - } - - Inst::BrIfXslteq32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xslteq32, - |s, src1, src2, x| enc::br_if_xslt32(s, src2, src1, x), - ); - } - - Inst::BrIfXult32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xult32, - |s, src1, src2, x| enc::br_if_xulteq32(s, src2, src1, x), - ); - } - - Inst::BrIfXulteq32 { - src1, - src2, - taken, - not_taken, - } => { - br_if_cond_helper( - sink, - *start_offset, - *src1, - *src2, - taken, - not_taken, - enc::br_if_xulteq32, - |s, src1, src2, x| enc::br_if_xult32(s, src2, src1, x), - ); + assert_eq!(sink.cur_offset(), not_taken_end); } Inst::LoadAddr { dst, mem } => { @@ -645,40 +508,3 @@ fn pulley_emit

( } } } - -fn br_if_cond_helper

( - sink: &mut MachBuffer>, - start_offset: u32, - src1: XReg, - src2: XReg, - taken: &MachLabel, - not_taken: &MachLabel, - mut enc: impl FnMut(&mut MachBuffer>, XReg, XReg, i32), - mut enc_inverted: impl FnMut(&mut SmallVec<[u8; 16]>, XReg, XReg, i32), -) where - P: PulleyTargetKind, -{ - // If taken. - let taken_start = start_offset + 3; - let taken_end = taken_start + 4; - - sink.use_label_at_offset(taken_start, *taken, LabelUse::Jump(3)); - let mut inverted = SmallVec::<[u8; 16]>::new(); - enc_inverted(&mut inverted, src1, src2, 0x00000000); - debug_assert_eq!( - inverted.len(), - usize::try_from(taken_end - start_offset).unwrap() - ); - - sink.add_cond_branch(start_offset, taken_end, *taken, &inverted); - enc(sink, src1, src2, 0x00000000); - debug_assert_eq!(sink.cur_offset(), taken_end); - - // If not taken. - let not_taken_start = taken_end + 1; - let not_taken_end = not_taken_start + 4; - - sink.use_label_at_offset(not_taken_start, *not_taken, LabelUse::Jump(1)); - sink.add_uncond_branch(taken_end, not_taken_end, *not_taken); - enc::jump(sink, 0x00000000); -} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index 41d82c2ed941..e2560639d1f0 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -113,15 +113,8 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { Inst::Unwind { .. } | Inst::Nop => {} - Inst::TrapIf { - cond: _, - size: _, - src1, - src2, - code: _, - } => { - collector.reg_use(src1); - collector.reg_use(src2); + Inst::TrapIf { cond, code: _ } => { + cond.get_operands(collector); } Inst::GetSpecial { dst, reg } => { @@ -164,52 +157,12 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { Inst::Jump { .. } => {} - Inst::BrIf32 { - c, + Inst::BrIf { + cond, taken: _, not_taken: _, } => { - collector.reg_use(c); - } - - Inst::BrIfXeq32 { - src1, - src2, - taken: _, - not_taken: _, - } - | Inst::BrIfXneq32 { - src1, - src2, - taken: _, - not_taken: _, - } - | Inst::BrIfXslt32 { - src1, - src2, - taken: _, - not_taken: _, - } - | Inst::BrIfXslteq32 { - src1, - src2, - taken: _, - not_taken: _, - } - | Inst::BrIfXult32 { - src1, - src2, - taken: _, - not_taken: _, - } - | Inst::BrIfXulteq32 { - src1, - src2, - taken: _, - not_taken: _, - } => { - collector.reg_use(src1); - collector.reg_use(src2); + cond.get_operands(collector); } Inst::LoadAddr { dst, mem } => { @@ -426,13 +379,7 @@ where } | Inst::Rets { .. } => MachTerminator::Ret, Inst::Jump { .. } => MachTerminator::Uncond, - Inst::BrIf32 { .. } - | Inst::BrIfXeq32 { .. } - | Inst::BrIfXneq32 { .. } - | Inst::BrIfXslt32 { .. } - | Inst::BrIfXslteq32 { .. } - | Inst::BrIfXult32 { .. } - | Inst::BrIfXulteq32 { .. } => MachTerminator::Cond, + Inst::BrIf { .. } => MachTerminator::Cond, Inst::BrTable { .. } => MachTerminator::Indirect, _ => MachTerminator::None, } @@ -611,16 +558,8 @@ impl Inst { Inst::Unwind { inst } => format!("unwind {inst:?}"), - Inst::TrapIf { - cond, - size, - src1, - src2, - code, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - format!("trap_if {cond}, {size:?}, {src1}, {src2} // code = {code:?}") + Inst::TrapIf { cond, code } => { + format!("trap_{cond} // code = {code:?}") } Inst::Nop => format!("nop"), @@ -651,88 +590,14 @@ impl Inst { Inst::Jump { label } => format!("jump {}", label.to_string()), - Inst::BrIf32 { - c, - taken, - not_taken, - } => { - let c = format_reg(**c); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if32 {c}, {taken}; jump {not_taken}") - } - - Inst::BrIfXeq32 { - src1, - src2, - taken, - not_taken, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if_xeq32 {src1}, {src2}, {taken}; jump {not_taken}") - } - Inst::BrIfXneq32 { - src1, - src2, - taken, - not_taken, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if_xneq32 {src1}, {src2}, {taken}; jump {not_taken}") - } - Inst::BrIfXslt32 { - src1, - src2, - taken, - not_taken, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if_xslt32 {src1}, {src2}, {taken}; jump {not_taken}") - } - Inst::BrIfXslteq32 { - src1, - src2, - taken, - not_taken, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if_xslteq32 {src1}, {src2}, {taken}; jump {not_taken}") - } - Inst::BrIfXult32 { - src1, - src2, - taken, - not_taken, - } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); - let taken = taken.to_string(); - let not_taken = not_taken.to_string(); - format!("br_if_xult32 {src1}, {src2}, {taken}; jump {not_taken}") - } - Inst::BrIfXulteq32 { - src1, - src2, + Inst::BrIf { + cond, taken, not_taken, } => { - let src1 = format_reg(**src1); - let src2 = format_reg(**src2); let taken = taken.to_string(); let not_taken = not_taken.to_string(); - format!("br_if_xulteq32 {src1}, {src2}, {taken}; jump {not_taken}") + format!("br_{cond}, {taken}; jump {not_taken}") } Inst::LoadAddr { dst, mem } => { diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 342bcc344d08..ffbfdaa107b4 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -11,14 +11,44 @@ ;; needs to handle situations such as when the `Value` is 64-bits an explicit ;; comparison must be made. Additionally if `Value` is smaller than 32-bits ;; then it must be sign-extended up to at least 32 bits. -(decl lower_cond (Value) XReg) -(rule (lower_cond val @ (value_type $I64)) (pulley_xneq64 val (pulley_xconst8 0))) -(rule (lower_cond val @ (value_type $I32)) val) -(rule (lower_cond val @ (value_type $I16)) (pulley_zext16 val)) -(rule (lower_cond val @ (value_type $I8)) (pulley_zext8 val)) +(decl lower_cond (Value) Cond) +(rule (lower_cond val @ (value_type $I64)) + (Cond.IfXneq64 val (pulley_xconst8 0))) +(rule (lower_cond val @ (value_type $I32)) (Cond.If32 val)) +(rule (lower_cond val @ (value_type $I16)) (Cond.If32 (pulley_zext16 val))) +(rule (lower_cond val @ (value_type $I8)) (Cond.If32 (pulley_zext8 val))) ;; Peel away explicit `uextend` values to take a look at the inner value. (rule 1 (lower_cond (uextend val)) (lower_cond val)) +;; Conditional branches on `icmp`s. +(rule 1 (lower_cond (icmp cc a b @ (value_type $I32))) (lower_cond_icmp32 cc a b)) +(rule 1 (lower_cond (icmp cc a b @ (value_type $I64))) (lower_cond_icmp64 cc a b)) + +(decl lower_cond_icmp32 (IntCC Value Value) Cond) +(rule (lower_cond_icmp32 (IntCC.Equal) a b) (Cond.IfXeq32 a b)) +(rule (lower_cond_icmp32 (IntCC.NotEqual) a b) (Cond.IfXneq32 a b)) +(rule (lower_cond_icmp32 (IntCC.SignedLessThan) a b) (Cond.IfXslt32 a b)) +(rule (lower_cond_icmp32 (IntCC.SignedLessThanOrEqual) a b) (Cond.IfXslteq32 a b)) +(rule (lower_cond_icmp32 (IntCC.UnsignedLessThan) a b) (Cond.IfXult32 a b)) +(rule (lower_cond_icmp32 (IntCC.UnsignedLessThanOrEqual) a b) (Cond.IfXulteq32 a b)) +;; Swap args for conditions pulley doesn't have +(rule (lower_cond_icmp32 (IntCC.SignedGreaterThan) a b) (Cond.IfXslt32 b a)) +(rule (lower_cond_icmp32 (IntCC.SignedGreaterThanOrEqual) a b) (Cond.IfXslteq32 b a)) +(rule (lower_cond_icmp32 (IntCC.UnsignedGreaterThan) a b) (Cond.IfXult32 b a)) +(rule (lower_cond_icmp32 (IntCC.UnsignedGreaterThanOrEqual) a b) (Cond.IfXulteq32 b a)) + +(decl lower_cond_icmp64 (IntCC Value Value) Cond) +(rule (lower_cond_icmp64 (IntCC.Equal) a b) (Cond.IfXeq64 a b)) +(rule (lower_cond_icmp64 (IntCC.NotEqual) a b) (Cond.IfXneq64 a b)) +(rule (lower_cond_icmp64 (IntCC.SignedLessThan) a b) (Cond.IfXslt64 a b)) +(rule (lower_cond_icmp64 (IntCC.SignedLessThanOrEqual) a b) (Cond.IfXslteq64 a b)) +(rule (lower_cond_icmp64 (IntCC.UnsignedLessThan) a b) (Cond.IfXult64 a b)) +(rule (lower_cond_icmp64 (IntCC.UnsignedLessThanOrEqual) a b) (Cond.IfXulteq64 a b)) +;; Swap args for conditions pulley doesn't have +(rule (lower_cond_icmp64 (IntCC.SignedGreaterThan) a b) (Cond.IfXslt64 b a)) +(rule (lower_cond_icmp64 (IntCC.SignedGreaterThanOrEqual) a b) (Cond.IfXslteq64 b a)) +(rule (lower_cond_icmp64 (IntCC.UnsignedGreaterThan) a b) (Cond.IfXult64 b a)) +(rule (lower_cond_icmp64 (IntCC.UnsignedGreaterThanOrEqual) a b) (Cond.IfXulteq64 b a)) ;; The main control-flow-lowering term: takes a control-flow instruction and ;; target(s) and emits the necessary instructions. @@ -30,37 +60,7 @@ ;; Generic case for conditional branches. (rule -1 (lower_branch (brif c _ _) (two_targets then else)) - (emit_side_effect (pulley_br_if32 (lower_cond c) then else))) - -;; Conditional branches on `icmp`s. -(rule (lower_branch (brif (maybe_uextend (icmp cc a b @ (value_type $I32))) _ _) - (two_targets then else)) - (emit_side_effect (lower_brif_of_icmp32 cc a b then else))) - -(decl lower_brif_of_icmp32 (IntCC Value Value MachLabel MachLabel) SideEffectNoResult) -(rule (lower_brif_of_icmp32 (IntCC.Equal) a b then else) - (pulley_br_if_xeq32 a b then else)) -(rule (lower_brif_of_icmp32 (IntCC.NotEqual) a b then else) - (pulley_br_if_xneq32 a b then else)) -(rule (lower_brif_of_icmp32 (IntCC.SignedLessThan) a b then else) - (pulley_br_if_xslt32 a b then else)) -(rule (lower_brif_of_icmp32 (IntCC.SignedLessThanOrEqual) a b then else) - (pulley_br_if_xslteq32 a b then else)) -(rule (lower_brif_of_icmp32 (IntCC.UnsignedLessThan) a b then else) - (pulley_br_if_xult32 a b then else)) -(rule (lower_brif_of_icmp32 (IntCC.UnsignedLessThanOrEqual) a b then else) - (pulley_br_if_xulteq32 a b then else)) - -;; Pulley doesn't have instructions for `>` and `>=`, so we have to reverse the -;; operation. -(rule (lower_brif_of_icmp32 (IntCC.SignedGreaterThan) a b then else) - (lower_brif_of_icmp32 (IntCC.SignedLessThan) b a then else)) -(rule (lower_brif_of_icmp32 (IntCC.SignedGreaterThanOrEqual) a b then else) - (lower_brif_of_icmp32 (IntCC.SignedLessThanOrEqual) b a then else)) -(rule (lower_brif_of_icmp32 (IntCC.UnsignedGreaterThan) a b then else) - (lower_brif_of_icmp32 (IntCC.UnsignedLessThan) b a then else)) -(rule (lower_brif_of_icmp32 (IntCC.UnsignedGreaterThanOrEqual) a b then else) - (lower_brif_of_icmp32 (IntCC.UnsignedLessThanOrEqual) b a then else)) + (emit_side_effect (pulley_br_if (lower_cond c) then else))) ;; Branch tables. (rule (lower_branch (br_table index _) (jump_table_targets default targets)) @@ -73,57 +73,11 @@ ;;;; Rules for `trapz` and `trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (trapz a @ (value_type (ty_32_or_64 ty)) code)) - (let ((zero Reg (pulley_xconst8 0))) - (side_effect (pulley_trap_if (IntCC.Equal) - (ty_to_operand_size ty) - a - zero - code)))) - -(rule (lower (trapnz a @ (value_type (ty_32_or_64 ty)) code)) - (let ((zero Reg (pulley_xconst8 0))) - (side_effect (pulley_trap_if (IntCC.NotEqual) - (ty_to_operand_size ty) - a - zero - code)))) - -;; Fold `(trap[n]z (icmp ...))` together. - -(rule 1 (lower (trapz (icmp cc a b @ (value_type (ty_32_or_64 ty))) code)) - (side_effect (pulley_trap_if (intcc_complement cc) - (ty_to_operand_size ty) - a - b - code))) - -(rule 1 (lower (trapnz (icmp cc a b @ (value_type (ty_32_or_64 ty))) code)) - (side_effect (pulley_trap_if cc - (ty_to_operand_size ty) - a - b - code))) - -;; Fold `(trap[n]z (iconst ...))` together. - -(rule 2 (lower (trapz (iconst (u64_from_imm64 (u64_nonzero _))) code)) - (output_none)) - -(rule 2 (lower (trapnz (iconst (u64_from_imm64 0)) code)) - (output_none)) - -;; TODO: These rules are disabled because they insert a block terminator into -;; the middle of the current block, which leads to regalloc errors. We should -;; ideally be able to lower conditional traps that will always trap into -;; unconditional traps though. This isn't very high priority though because -;; traps, pretty much by definition, are not hot paths. -;; -;; (rule 3 (lower (trapnz (iconst (u64_from_imm64 (u64_nonzero _))) code)) -;; (side_effect (pulley_trap code))) -;; -;; (rule 3 (lower (trapz (iconst (u64_from_imm64 0)) code)) -;; (side_effect (pulley_trap code))) +(rule (lower (trapz cond code)) + (side_effect (pulley_trap_if (cond_invert (lower_cond cond)) code))) + +(rule (lower (trapnz cond code)) + (side_effect (pulley_trap_if (lower_cond cond) code))) ;;;; Rules for `get_stack_pointer` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -470,13 +424,30 @@ ;;;; Rules for `select` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (ty_int (fits_in_32 _)) (select c a b))) - (pulley_xselect32 (lower_cond c) a b)) + (pulley_xselect32 (emit_cond (lower_cond c)) a b)) (rule 1 (lower (has_type $I64 (select c a b))) - (pulley_xselect64 (lower_cond c) a b)) + (pulley_xselect64 (emit_cond (lower_cond c)) a b)) (rule 1 (lower (has_type $F32 (select c a b))) - (pulley_fselect32 (lower_cond c) a b)) + (pulley_fselect32 (emit_cond (lower_cond c)) a b)) (rule 1 (lower (has_type $F64 (select c a b))) - (pulley_fselect64 (lower_cond c) a b)) + (pulley_fselect64 (emit_cond (lower_cond c)) a b)) + +;; Helper to emit a conditional into a register itself. +(decl emit_cond (Cond) XReg) +(rule (emit_cond (Cond.If32 reg)) reg) +(rule (emit_cond (Cond.IfNot32 reg)) (pulley_xeq32 reg (pulley_xconst8 0))) +(rule (emit_cond (Cond.IfXeq32 src1 src2)) (pulley_xeq32 src1 src2)) +(rule (emit_cond (Cond.IfXneq32 src1 src2)) (pulley_xneq32 src1 src2)) +(rule (emit_cond (Cond.IfXslt32 src1 src2)) (pulley_xslt32 src1 src2)) +(rule (emit_cond (Cond.IfXslteq32 src1 src2)) (pulley_xslteq32 src1 src2)) +(rule (emit_cond (Cond.IfXult32 src1 src2)) (pulley_xult32 src1 src2)) +(rule (emit_cond (Cond.IfXulteq32 src1 src2)) (pulley_xulteq32 src1 src2)) +(rule (emit_cond (Cond.IfXeq64 src1 src2)) (pulley_xeq64 src1 src2)) +(rule (emit_cond (Cond.IfXneq64 src1 src2)) (pulley_xneq64 src1 src2)) +(rule (emit_cond (Cond.IfXslt64 src1 src2)) (pulley_xslt64 src1 src2)) +(rule (emit_cond (Cond.IfXslteq64 src1 src2)) (pulley_xslteq64 src1 src2)) +(rule (emit_cond (Cond.IfXult64 src1 src2)) (pulley_xult64 src1 src2)) +(rule (emit_cond (Cond.IfXulteq64 src1 src2)) (pulley_xulteq64 src1 src2)) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs index ae61bbc18fbc..d5107b9950da 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -10,7 +10,7 @@ use crate::ir::{condcodes::*, immediates::*, types::*, *}; use crate::isa::pulley_shared::{ abi::*, inst::{FReg, OperandSize, VReg, WritableFReg, WritableVReg, WritableXReg, XReg}, - lower::regs, + lower::{regs, Cond}, *, }; use crate::machinst::{ @@ -114,6 +114,10 @@ where fn lr_reg(&mut self) -> XReg { XReg::new(regs::lr_reg()).unwrap() } + + fn cond_invert(&mut self, cond: &Cond) -> Cond { + cond.invert() + } } /// The main entry point for lowering with ISLE. diff --git a/cranelift/filetests/filetests/isa/pulley32/brif.clif b/cranelift/filetests/filetests/isa/pulley32/brif.clif index 73059c7a65d9..3a0bf0bceef8 100644 --- a/cranelift/filetests/filetests/isa/pulley32/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley32/brif.clif @@ -111,8 +111,7 @@ block2: ; VCode: ; block0: ; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 +; br_if_xneq64 x0, x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -122,8 +121,7 @@ block2: ; ; Disassembled: ; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0xa // target = 0x10 +; br_if_xneq64 x0, x4, 0xb // target = 0xe ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -246,9 +244,7 @@ block2: ; VCode: ; block0: -; xulteq64 x6, x1, x0 -; zext8 x6, x6 -; br_if32 x6, label2; jump label1 +; br_if_xulteq64 x1, x0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -257,9 +253,7 @@ block2: ; ret ; ; Disassembled: -; xulteq64 x6, x1, x0 -; zext8 x6, x6 -; br_if32 x6, 0xa // target = 0x10 +; br_if_xulteq64 x1, x0, 0xb // target = 0xb ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley32/trap.clif b/cranelift/filetests/filetests/isa/pulley32/trap.clif index 8d5da4749bf1..f11b1f2de43e 100644 --- a/cranelift/filetests/filetests/isa/pulley32/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley32/trap.clif @@ -24,7 +24,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if eq, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xeq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -44,7 +44,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if ne, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xneq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -64,7 +64,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if eq, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xeq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if ne, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xneq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -110,26 +110,31 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 +; xconst8 x6, 0 +; br_if_xneq64 x0, x6, label2; jump label1 ; block1: +; xconst8 x7, 0 +; xconst8 x8, 0 +; trap_if_xneq64 x7, x8 // code = TrapCode(1) ; ret ; block2: -; xconst8 x7, 42 -; xconst8 x8, 0 -; trap_if ne, Size64, x7, x8 // code = TrapCode(1) +; xconst8 x9, 42 +; xconst8 x10, 0 +; trap_if_xneq64 x9, x10 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0x7 // target = 0xd -; ret -; xconst8 x7, 42 +; xconst8 x6, 0 +; br_if_xneq64 x0, x6, 0x15 // target = 0x18 +; xconst8 x7, 0 ; xconst8 x8, 0 -; br_if_xneq64 x7, x8, 0x8 // target = 0x1b +; br_if_xneq64 x7, x8, 0x16 // target = 0x26 ; ret +; xconst8 x9, 42 +; xconst8 x10, 0 +; br_if_xneq64 x9, x10, 0xb // target = 0x29 +; ret +; trap ; trap function %trapz_iconst_fold(i64) { @@ -149,25 +154,30 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 -; block1: ; xconst8 x6, 0 +; br_if_xneq64 x0, x6, label2; jump label1 +; block1: ; xconst8 x7, 0 -; trap_if eq, Size64, x6, x7 // code = TrapCode(1) +; xconst8 x8, 0 +; trap_if_xeq64 x7, x8 // code = TrapCode(1) ; ret ; block2: +; xconst8 x9, 42 +; xconst8 x10, 0 +; trap_if_xeq64 x9, x10 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0x14 // target = 0x1a ; xconst8 x6, 0 +; br_if_xneq64 x0, x6, 0x15 // target = 0x18 ; xconst8 x7, 0 -; br_if_xeq64 x6, x7, 0x9 // target = 0x1b +; xconst8 x8, 0 +; br_if_xeq64 x7, x8, 0x16 // target = 0x26 ; ret +; xconst8 x9, 42 +; xconst8 x10, 0 +; br_if_xeq64 x9, x10, 0xb // target = 0x29 ; ret ; trap +; trap diff --git a/cranelift/filetests/filetests/isa/pulley64/brif.clif b/cranelift/filetests/filetests/isa/pulley64/brif.clif index d8ae5981d49f..9634f0bc25ea 100644 --- a/cranelift/filetests/filetests/isa/pulley64/brif.clif +++ b/cranelift/filetests/filetests/isa/pulley64/brif.clif @@ -111,8 +111,7 @@ block2: ; VCode: ; block0: ; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 +; br_if_xneq64 x0, x4, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -122,8 +121,7 @@ block2: ; ; Disassembled: ; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0xa // target = 0x10 +; br_if_xneq64 x0, x4, 0xb // target = 0xe ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 @@ -246,9 +244,7 @@ block2: ; VCode: ; block0: -; xulteq64 x6, x1, x0 -; zext8 x6, x6 -; br_if32 x6, label2; jump label1 +; br_if_xulteq64 x1, x0, label2; jump label1 ; block1: ; xconst8 x0, 0 ; ret @@ -257,9 +253,7 @@ block2: ; ret ; ; Disassembled: -; xulteq64 x6, x1, x0 -; zext8 x6, x6 -; br_if32 x6, 0xa // target = 0x10 +; br_if_xulteq64 x1, x0, 0xb // target = 0xb ; xconst8 x0, 0 ; ret ; xconst8 x0, 1 diff --git a/cranelift/filetests/filetests/isa/pulley64/trap.clif b/cranelift/filetests/filetests/isa/pulley64/trap.clif index ed68dbdf1665..e343de871480 100644 --- a/cranelift/filetests/filetests/isa/pulley64/trap.clif +++ b/cranelift/filetests/filetests/isa/pulley64/trap.clif @@ -24,7 +24,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if eq, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xeq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -44,7 +44,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if ne, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xneq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -64,7 +64,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if eq, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xeq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -84,7 +84,7 @@ block0(v0: i64): ; VCode: ; block0: ; xconst8 x2, 42 -; trap_if ne, Size64, x0, x2 // code = TrapCode(1) +; trap_if_xneq64 x0, x2 // code = TrapCode(1) ; ret ; ; Disassembled: @@ -110,26 +110,31 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 +; xconst8 x6, 0 +; br_if_xneq64 x0, x6, label2; jump label1 ; block1: +; xconst8 x7, 0 +; xconst8 x8, 0 +; trap_if_xneq64 x7, x8 // code = TrapCode(1) ; ret ; block2: -; xconst8 x7, 42 -; xconst8 x8, 0 -; trap_if ne, Size64, x7, x8 // code = TrapCode(1) +; xconst8 x9, 42 +; xconst8 x10, 0 +; trap_if_xneq64 x9, x10 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0x7 // target = 0xd -; ret -; xconst8 x7, 42 +; xconst8 x6, 0 +; br_if_xneq64 x0, x6, 0x15 // target = 0x18 +; xconst8 x7, 0 ; xconst8 x8, 0 -; br_if_xneq64 x7, x8, 0x8 // target = 0x1b +; br_if_xneq64 x7, x8, 0x16 // target = 0x26 ; ret +; xconst8 x9, 42 +; xconst8 x10, 0 +; br_if_xneq64 x9, x10, 0xb // target = 0x29 +; ret +; trap ; trap function %trapz_iconst_fold(i64) { @@ -149,25 +154,30 @@ block2: ; VCode: ; block0: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, label2; jump label1 -; block1: ; xconst8 x6, 0 +; br_if_xneq64 x0, x6, label2; jump label1 +; block1: ; xconst8 x7, 0 -; trap_if eq, Size64, x6, x7 // code = TrapCode(1) +; xconst8 x8, 0 +; trap_if_xeq64 x7, x8 // code = TrapCode(1) ; ret ; block2: +; xconst8 x9, 42 +; xconst8 x10, 0 +; trap_if_xeq64 x9, x10 // code = TrapCode(1) ; ret ; ; Disassembled: -; xconst8 x4, 0 -; xneq64 x6, x0, x4 -; br_if32 x6, 0x14 // target = 0x1a ; xconst8 x6, 0 +; br_if_xneq64 x0, x6, 0x15 // target = 0x18 ; xconst8 x7, 0 -; br_if_xeq64 x6, x7, 0x9 // target = 0x1b +; xconst8 x8, 0 +; br_if_xeq64 x7, x8, 0x16 // target = 0x26 ; ret +; xconst8 x9, 42 +; xconst8 x10, 0 +; br_if_xeq64 x9, x10, 0xb // target = 0x29 ; ret ; trap +; trap diff --git a/crates/cranelift/src/translate/table.rs b/crates/cranelift/src/translate/table.rs index 8ffe21624a95..9fa7ce8e39ba 100644 --- a/crates/cranelift/src/translate/table.rs +++ b/crates/cranelift/src/translate/table.rs @@ -65,6 +65,9 @@ impl TableData { ) -> (ir::Value, ir::MemFlags) { let index_ty = pos.func.dfg.value_type(index); let addr_ty = env.pointer_type(); + let spectre_mitigations_enabled = + env.isa().flags().enable_table_access_spectre_mitigation() + && env.clif_memory_traps_enabled(); // Start with the bounds check. Trap if `index + 1 > bound`. let bound = self.bound.bound(env.isa(), pos.cursor(), index_ty); @@ -74,7 +77,7 @@ impl TableData { .ins() .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound); - if !env.isa().flags().enable_table_access_spectre_mitigation() { + if !spectre_mitigations_enabled { env.trapnz(pos, oob, crate::TRAP_TABLE_OUT_OF_BOUNDS); } @@ -101,7 +104,7 @@ impl TableData { let base_flags = ir::MemFlags::new() .with_aligned() .with_alias_region(Some(ir::AliasRegion::Table)); - if env.isa().flags().enable_table_access_spectre_mitigation() { + if spectre_mitigations_enabled { // Short-circuit the computed table element address to a null pointer // when out-of-bounds. The consumer of this address will trap when // trying to access it. diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index 53497575eac2..0641b29e4339 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -315,6 +315,7 @@ std = [ 'object/std', 'once_cell', 'wasmtime-fiber?/std', + 'pulley-interpreter?/std', # technically this isn't necessary but once you have the standard library you # probably want things to go fast in which case you've probably got signal # handlers and such so implicitly enable this. This also helps reduce the diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 7cf2c05a2631..666adaf8d393 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -395,7 +395,6 @@ impl WastTest { // features in Pulley are implemented. if config.compiler == Compiler::CraneliftPulley { let unsupported = [ - "misc_testsuite/call_indirect.wast", "misc_testsuite/component-model/fused.wast", "misc_testsuite/component-model/strings.wast", "misc_testsuite/embenchen_fannkuch.wast", @@ -403,27 +402,15 @@ impl WastTest { "misc_testsuite/embenchen_ifs.wast", "misc_testsuite/embenchen_primes.wast", "misc_testsuite/float-round-doesnt-load-too-much.wast", - "misc_testsuite/function-references/call_indirect.wast", - "misc_testsuite/function-references/instance.wast", - "misc_testsuite/function-references/table_fill.wast", - "misc_testsuite/function-references/table_get.wast", - "misc_testsuite/function-references/table_grow.wast", - "misc_testsuite/function-references/table_set.wast", - "misc_testsuite/gc/anyref_that_is_i31_barriers.wast", - "misc_testsuite/gc/i31ref-of-global-initializers.wast", - "misc_testsuite/gc/i31ref-tables.wast", "misc_testsuite/int-to-float-splat.wast", "misc_testsuite/issue1809.wast", "misc_testsuite/issue4840.wast", "misc_testsuite/issue4890.wast", "misc_testsuite/issue6562.wast", - "misc_testsuite/many_table_gets_lead_to_gc.wast", "misc_testsuite/memory-combos.wast", "misc_testsuite/memory64/simd.wast", "misc_testsuite/memory64/threads.wast", "misc_testsuite/misc_traps.wast", - "misc_testsuite/no-panic.wast", - "misc_testsuite/partial-init-table-segment.wast", "misc_testsuite/rust_fannkuch.wast", "misc_testsuite/simd/almost-extmul.wast", "misc_testsuite/simd/canonicalize-nan.wast", @@ -438,8 +425,6 @@ impl WastTest { "misc_testsuite/simd/spillslot-size-fuzzbug.wast", "misc_testsuite/simd/unaligned-load.wast", "misc_testsuite/simd/v128-select.wast", - "misc_testsuite/table_copy.wast", - "misc_testsuite/table_copy_on_imported_tables.wast", "misc_testsuite/threads/LB_atomic.wast", "misc_testsuite/threads/MP_atomic.wast", "misc_testsuite/threads/MP_wait.wast", @@ -452,14 +437,9 @@ impl WastTest { "misc_testsuite/winch/_simd_store.wast", "misc_testsuite/winch/global.wast", "misc_testsuite/winch/select.wast", - "misc_testsuite/winch/table_fill.wast", - "misc_testsuite/winch/table_get.wast", - "misc_testsuite/winch/table_set.wast", - "spec_testsuite/bulk.wast", "spec_testsuite/call.wast", "spec_testsuite/call_indirect.wast", "spec_testsuite/conversions.wast", - "spec_testsuite/elem.wast", "spec_testsuite/endianness.wast", "spec_testsuite/f32.wast", "spec_testsuite/f32_bitwise.wast", @@ -471,7 +451,6 @@ impl WastTest { "spec_testsuite/float_exprs.wast", "spec_testsuite/float_literals.wast", "spec_testsuite/float_misc.wast", - "spec_testsuite/func_ptrs.wast", "spec_testsuite/global.wast", "spec_testsuite/i32.wast", "spec_testsuite/i64.wast", @@ -479,27 +458,17 @@ impl WastTest { "spec_testsuite/imports.wast", "spec_testsuite/int_exprs.wast", "spec_testsuite/labels.wast", - "spec_testsuite/left-to-right.wast", - "spec_testsuite/linking.wast", - "spec_testsuite/load.wast", "spec_testsuite/local_get.wast", "spec_testsuite/local_set.wast", "spec_testsuite/local_tee.wast", "spec_testsuite/loop.wast", "spec_testsuite/memory.wast", - "spec_testsuite/memory_grow.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", - "spec_testsuite/proposals/extended-const/elem.wast", "spec_testsuite/proposals/extended-const/global.wast", "spec_testsuite/proposals/multi-memory/float_exprs0.wast", "spec_testsuite/proposals/multi-memory/float_exprs1.wast", "spec_testsuite/proposals/multi-memory/imports.wast", - "spec_testsuite/proposals/multi-memory/linking0.wast", - "spec_testsuite/proposals/multi-memory/linking3.wast", - "spec_testsuite/proposals/multi-memory/load.wast", - "spec_testsuite/proposals/multi-memory/load2.wast", "spec_testsuite/proposals/multi-memory/memory.wast", - "spec_testsuite/proposals/multi-memory/memory_grow.wast", "spec_testsuite/proposals/multi-memory/simd_memory-multi.wast", "spec_testsuite/proposals/relaxed-simd/i16x8_relaxed_q15mulr_s.wast", "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", @@ -511,8 +480,6 @@ impl WastTest { "spec_testsuite/proposals/threads/atomic.wast", "spec_testsuite/proposals/threads/imports.wast", "spec_testsuite/proposals/threads/memory.wast", - "spec_testsuite/ref_func.wast", - "spec_testsuite/ref_is_null.wast", "spec_testsuite/select.wast", "spec_testsuite/simd_address.wast", "spec_testsuite/simd_align.wast", @@ -572,12 +539,6 @@ impl WastTest { "spec_testsuite/simd_store8_lane.wast", "spec_testsuite/stack.wast", "spec_testsuite/switch.wast", - "spec_testsuite/table_copy.wast", - "spec_testsuite/table_fill.wast", - "spec_testsuite/table_get.wast", - "spec_testsuite/table_grow.wast", - "spec_testsuite/table_init.wast", - "spec_testsuite/table_set.wast", "spec_testsuite/traps.wast", ]; diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index 687ada74d2f1..7cf6a2e0afeb 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -7,14 +7,12 @@ ) ;; wasm[0]::function[0]: ;; push_frame -;; xload64le_offset32 x8, x0, 8 -;; xload64le_offset32 x9, x0, 32 -;; xload64le_offset32 x9, x9, 0 -;; xload64le_offset32 x8, x8, 8 -;; xulteq64 x8, x8, x9 -;; zext8 x8, x8 -;; br_if32 x8, 0x8 // target = 0x2b -;; 29: pop_frame +;; xload64le_offset32 x6, x0, 8 +;; xload64le_offset32 x7, x0, 32 +;; xload64le_offset32 x7, x7, 0 +;; xload64le_offset32 x6, x6, 8 +;; br_if_xulteq64 x6, x7, 0x9 // target = 0x26 +;; 24: pop_frame ;; ret -;; 2b: call 0xa2 // target = 0xcd -;; 30: jump 0xfffffffffffffff9 // target = 0x29 +;; 26: call 0xbd // target = 0xe3 +;; 2b: jump 0xfffffffffffffff9 // target = 0x24