Skip to content

Commit

Permalink
pulley: Support big-endian targets (bytecodealliance#9759)
Browse files Browse the repository at this point in the history
* pulley: Support big-endian targets

This commit fixes the Pulley interpreter and Cranelift backend to
properly support big-endian targets. The problem beforehand was that
loads/stores in Pulley were defined as the native endianness which means
that big and little-endian platforms differed. Additionally the
previously set of understood pulley targets were implicitly always
little-endian which was not appropriate for a big-endian host where JIT
data structures are stored in big-endian format.

This commit fixes all of these issues with a few changes:

* Pulley loads/stores are always little-endian now.
* Pulley now has bswap32/bswap64 instructions.
* Wasmtime/Cranelift now understand big-endian pulley targets (e.g.
  `pulley32be`).
* CLIF translation of loads/stores now properly handles the endianness
  flags on `MemFlags`.

In the future if necessary we could natively add a macro-op for
big-endian loads/stores to Pulley but for now it's more minimal to just
have `bswap32` and `bswap64`.

The result of this commit is that Pulley tests are now run and executed
on s390x like all other platforms.

* Fix fuzz build

* Review comments

* Fix fuzz build
  • Loading branch information
alexcrichton authored Dec 6, 2024
1 parent 019078a commit 0a894cb
Show file tree
Hide file tree
Showing 24 changed files with 413 additions and 160 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions cranelift/codegen/meta/src/isa/pulley.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,11 @@ pub(crate) fn define() -> TargetIsa {
* 'pointer64'\n",
vec!["pointer32", "pointer64"],
);
settings.add_bool(
"big_endian",
"Whether this is a big-endian target",
"Whether this is a big-endian target",
false,
);
TargetIsa::new("pulley", settings.build())
}
8 changes: 6 additions & 2 deletions cranelift/codegen/src/isa/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,12 @@ pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple),
Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple),
Architecture::Riscv64 { .. } => isa_builder!(riscv64, (feature = "riscv64"), triple),
Architecture::Pulley32 => isa_builder!(pulley32, (feature = "pulley"), triple),
Architecture::Pulley64 => isa_builder!(pulley64, (feature = "pulley"), triple),
Architecture::Pulley32 | Architecture::Pulley32be => {
isa_builder!(pulley32, (feature = "pulley"), triple)
}
Architecture::Pulley64 | Architecture::Pulley64be => {
isa_builder!(pulley64, (feature = "pulley"), triple)
}
_ => Err(LookupError::Unsupported),
}
}
Expand Down
16 changes: 16 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@
(Sext8 (dst WritableXReg) (src XReg))
(Sext16 (dst WritableXReg) (src XReg))
(Sext32 (dst WritableXReg) (src XReg))

;; Byte swaps.
(Bswap32 (dst WritableXReg) (src XReg))
(Bswap64 (dst WritableXReg) (src XReg))
)
)

Expand Down Expand Up @@ -624,6 +628,18 @@
(_ Unit (emit (MInst.Sext32 dst src))))
dst))

(decl pulley_bswap32 (XReg) XReg)
(rule (pulley_bswap32 src)
(let ((dst WritableXReg (temp_writable_xreg))
(_ Unit (emit (MInst.Bswap32 dst src))))
dst))

(decl pulley_bswap64 (XReg) XReg)
(rule (pulley_bswap64 src)
(let ((dst WritableXReg (temp_writable_xreg))
(_ Unit (emit (MInst.Bswap64 dst src))))
dst))

;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(decl gen_call (SigRef ExternalName RelocDistance ValueSlice) InstOutput)
Expand Down
2 changes: 2 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ fn pulley_emit<P>(
Inst::Sext8 { dst, src } => enc::sext8(sink, dst, src),
Inst::Sext16 { dst, src } => enc::sext16(sink, dst, src),
Inst::Sext32 { dst, src } => enc::sext32(sink, dst, src),
Inst::Bswap32 { dst, src } => enc::bswap32(sink, dst, src),
Inst::Bswap64 { dst, src } => enc::bswap64(sink, dst, src),
}
}

Expand Down
14 changes: 13 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
| Inst::Zext32 { dst, src }
| Inst::Sext8 { dst, src }
| Inst::Sext16 { dst, src }
| Inst::Sext32 { dst, src } => {
| Inst::Sext32 { dst, src }
| Inst::Bswap32 { dst, src }
| Inst::Bswap64 { dst, src } => {
collector.reg_use(src);
collector.reg_def(dst);
}
Expand Down Expand Up @@ -915,6 +917,16 @@ impl Inst {
let src = format_reg(**src);
format!("sext32 {dst}, {src}")
}
Inst::Bswap32 { dst, src } => {
let dst = format_reg(*dst.to_reg());
let src = format_reg(**src);
format!("bswap32 {dst}, {src}")
}
Inst::Bswap64 { dst, src } => {
let dst = format_reg(*dst.to_reg());
let src = format_reg(**src);
format!("bswap64 {dst}, {src}")
}
}
}
}
Expand Down
27 changes: 22 additions & 5 deletions cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -226,19 +226,36 @@
;;;; Rules for `load` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (load flags addr (offset32 offset))))
(pulley_load (Amode.RegOffset addr (i32_as_i64 offset))
ty
flags
(ExtKind.Zero)))
(let ((le Reg (pulley_load (Amode.RegOffset addr (i32_as_i64 offset))
ty
flags
(ExtKind.Zero))))
(xswap_if_be le ty flags)))


;;;; Rules for `store` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(type Endianness extern (enum Big Little))
(decl pure endianness (MemFlags) Endianness)
(extern constructor endianness endianness)

(rule (lower (store flags src @ (value_type ty) addr (offset32 offset)))
(side_effect (pulley_store (Amode.RegOffset addr (i32_as_i64 offset))
src
(xswap_if_be src ty flags)
ty
flags)))

(decl xswap_if_be (XReg Type MemFlags) XReg)
(rule (xswap_if_be val _ty flags)
(if-let (Endianness.Little) (endianness flags))
val)
(rule (xswap_if_be val $I32 flags)
(if-let (Endianness.Big) (endianness flags))
(pulley_bswap32 val))
(rule (xswap_if_be val $I64 flags)
(if-let (Endianness.Big) (endianness flags))
(pulley_bswap64 val))

;;;; Rules for `stack_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (stack_addr stack_slot offset))
Expand Down
4 changes: 4 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ where
fn lr_reg(&mut self) -> XReg {
XReg::new(regs::lr_reg()).unwrap()
}

fn endianness(&mut self, flags: MemFlags) -> Endianness {
flags.endianness(self.backend.target_endianness())
}
}

/// The main entry point for lowering with ISLE.
Expand Down
25 changes: 18 additions & 7 deletions cranelift/codegen/src/isa/pulley_shared/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ mod settings;

use self::inst::EmitInfo;
use super::{Builder as IsaBuilder, FunctionAlignment};
use crate::ir::Endianness;
use crate::{
dominator_tree::DominatorTree,
ir,
Expand Down Expand Up @@ -124,6 +125,14 @@ where
let abi = abi::PulleyCallee::new(func, self, &self.isa_flags, &sigs)?;
machinst::compile::<Self>(func, domtree, self, abi, emit_info, sigs, ctrl_plane)
}

pub fn target_endianness(&self) -> Endianness {
if self.isa_flags.big_endian() {
Endianness::Big
} else {
Endianness::Little
}
}
}

impl<P> TargetIsa for PulleyBackend<P>
Expand Down Expand Up @@ -234,14 +243,10 @@ where

/// Create a new Pulley ISA builder.
pub fn isa_builder(triple: Triple) -> IsaBuilder {
assert!(matches!(
triple.architecture,
Architecture::Pulley32 | Architecture::Pulley64
));
let constructor = match triple.architecture {
Architecture::Pulley32 => isa_constructor_32,
Architecture::Pulley64 => isa_constructor_64,
_ => unreachable!(),
Architecture::Pulley32 | Architecture::Pulley32be => isa_constructor_32,
Architecture::Pulley64 | Architecture::Pulley64be => isa_constructor_64,
other => panic!("unexpected architecture {other:?}"),
};
IsaBuilder {
triple,
Expand All @@ -258,6 +263,9 @@ fn isa_constructor_32(
use crate::settings::Configurable;
let mut builder = builder.clone();
builder.set("pointer_width", "pointer32").unwrap();
if triple.endianness().unwrap() == target_lexicon::Endianness::Big {
builder.enable("big_endian").unwrap();
}
let isa_flags = PulleyFlags::new(&shared_flags, &builder);

let backend =
Expand All @@ -273,6 +281,9 @@ fn isa_constructor_64(
use crate::settings::Configurable;
let mut builder = builder.clone();
builder.set("pointer_width", "pointer64").unwrap();
if triple.endianness().unwrap() == target_lexicon::Endianness::Big {
builder.enable("big_endian").unwrap();
}
let isa_flags = PulleyFlags::new(&shared_flags, &builder);

let backend =
Expand Down
71 changes: 71 additions & 0 deletions cranelift/filetests/filetests/isa/pulley64/loadbe.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
test compile precise-output
target pulley64 big_endian

function %load_i32(i64) -> i32 {
block0(v0: i64):
v1 = load.i32 v0
return v1
}

; VCode:
; block0:
; x2 = load32_u x0+0 // flags =
; bswap32 x0, x2
; ret
;
; Disassembled:
; load32_u x2, x0
; bswap32 x0, x2
; ret

function %load_i64(i64) -> i64 {
block0(v0: i64):
v1 = load.i64 v0
return v1
}

; VCode:
; block0:
; x2 = load64_u x0+0 // flags =
; bswap64 x0, x2
; ret
;
; Disassembled:
; load64 x2, x0
; bswap64 x0, x2
; ret

function %load_i32_with_offset(i64) -> i32 {
block0(v0: i64):
v1 = load.i32 v0+4
return v1
}

; VCode:
; block0:
; x2 = load32_u x0+4 // flags =
; bswap32 x0, x2
; ret
;
; Disassembled:
; load32_u_offset8 x2, x0, 4
; bswap32 x0, x2
; ret

function %load_i64_with_offset(i64) -> i64 {
block0(v0: i64):
v1 = load.i64 v0+8
return v1
}

; VCode:
; block0:
; x2 = load64_u x0+8 // flags =
; bswap64 x0, x2
; ret
;
; Disassembled:
; load64_offset8 x2, x0, 8
; bswap64 x0, x2
; ret

71 changes: 71 additions & 0 deletions cranelift/filetests/filetests/isa/pulley64/storebe.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
test compile precise-output
target pulley64 big_endian

function %store_i32(i32, i64) {
block0(v0: i32, v1: i64):
store v0, v1
return
}

; VCode:
; block0:
; bswap32 x3, x0
; store32 x1+0, x3 // flags =
; ret
;
; Disassembled:
; bswap32 x3, x0
; store32 x1, x3
; ret

function %store_i64(i64, i64) {
block0(v0: i64, v1: i64):
store v0, v1
return
}

; VCode:
; block0:
; bswap64 x3, x0
; store64 x1+0, x3 // flags =
; ret
;
; Disassembled:
; bswap64 x3, x0
; store64 x1, x3
; ret

function %store_i32_with_offset(i32, i64) {
block0(v0: i32, v1: i64):
store v0, v1+4
return
}

; VCode:
; block0:
; bswap32 x3, x0
; store32 x1+4, x3 // flags =
; ret
;
; Disassembled:
; bswap32 x3, x0
; store32_offset8 x1, 4, x3
; ret

function %store_i64_with_offset(i64, i64) {
block0(v0: i64, v1: i64):
store v0, v1+8
return
}

; VCode:
; block0:
; bswap64 x3, x0
; store64 x1+8, x3 // flags =
; ret
;
; Disassembled:
; bswap64 x3, x0
; store64_offset8 x1, 8, x3
; ret

Loading

0 comments on commit 0a894cb

Please sign in to comment.