From 6c781d75ba4915ab3c65bc73cad6e4cf0261a139 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Tue, 11 Jun 2024 18:44:15 +0100 Subject: [PATCH] aarch64: Add support for `load+extends` patterns This commit adds support for merging a load with a `{u,s}extend` instruction. On AArch64 the load instructions already do this by default, so we can just emit the regular loads. See also #8765 that does a similar thing for RISC-V --- cranelift/codegen/src/isa/aarch64/lower.isle | 22 ++ .../filetests/isa/aarch64/load-extends.clif | 208 ++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/aarch64/load-extends.clif diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 6b5c22bec341..dc7ed36e4b69 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1139,6 +1139,17 @@ (u8_from_uimm8 lane))))) (value_regs (mov_from_vec (put_in_reg vec) lane (lane_size in)) (imm $I64 (ImmExtend.Zero) 0))) +;; Zero extensions from a load can be encoded in the load itself +(rule (lower (has_type (fits_in_64 _) (uextend x @ (has_type in_ty (load flags address offset))))) + (if-let inst (is_sinkable_inst x)) + (let ((_ Unit (sink_inst inst))) + (aarch64_uload in_ty (amode in_ty address offset) flags))) + +(decl aarch64_uload (Type AMode MemFlags) Reg) +(rule (aarch64_uload $I8 amode flags) (aarch64_uload8 amode flags)) +(rule (aarch64_uload $I16 amode flags) (aarch64_uload16 amode flags)) +(rule (aarch64_uload $I32 amode flags) (aarch64_uload32 amode flags)) + ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General rule for extending input to an output which fits in a single @@ -1187,6 +1198,17 @@ (hi Reg (asr_imm $I64 lo (imm_shift_from_u8 63)))) (value_regs lo hi))) +;; Signed extensions from a load can be encoded in the load itself +(rule (lower (has_type (fits_in_64 _) (sextend x @ (has_type in_ty (load flags address offset))))) + (if-let inst (is_sinkable_inst x)) + (let ((_ Unit (sink_inst inst))) + (aarch64_sload in_ty (amode in_ty address offset) flags))) + +(decl aarch64_sload (Type AMode MemFlags) Reg) +(rule (aarch64_sload $I8 amode flags) (aarch64_sload8 amode flags)) +(rule (aarch64_sload $I16 amode flags) (aarch64_sload16 amode flags)) +(rule (aarch64_sload $I32 amode flags) (aarch64_sload32 amode flags)) + ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Base case using `orn` between two registers. diff --git a/cranelift/filetests/filetests/isa/aarch64/load-extends.clif b/cranelift/filetests/filetests/isa/aarch64/load-extends.clif new file mode 100644 index 000000000000..de9240eb39bf --- /dev/null +++ b/cranelift/filetests/filetests/isa/aarch64/load-extends.clif @@ -0,0 +1,208 @@ +test compile precise-output +set unwind_info=false +target aarch64 + +function %load_uextend_i8_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i16 v1 + return v2 +} + +; VCode: +; block0: +; ldrb w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0] ; trap: heap_oob +; ret + +function %load_uextend_i8_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; ldrb w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0] ; trap: heap_oob +; ret + +function %load_uextend_i8_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldrb w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrb w0, [x0] ; trap: heap_oob +; ret + +function %load_uextend_i16_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = uextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; ldrh w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrh w0, [x0] ; trap: heap_oob +; ret + +function %load_uextend_i16_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldrh w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrh w0, [x0] ; trap: heap_oob +; ret + +function %load_uextend_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i32 v0 + v2 = uextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldr w0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldr w0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i8_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i16 v1 + return v2 +} + +; VCode: +; block0: +; ldrsb x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsb x0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i8_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; ldrsb x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsb x0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i8_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i8 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldrsb x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsb x0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i16_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = sextend.i32 v1 + return v2 +} + +; VCode: +; block0: +; ldrsh x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsh x0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i16_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i16 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldrsh x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsh x0, [x0] ; trap: heap_oob +; ret + +function %load_sextend_i32_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i32 v0 + v2 = sextend.i64 v1 + return v2 +} + +; VCode: +; block0: +; ldrsw x0, [x0] +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; ldrsw x0, [x0] ; trap: heap_oob +; ret +