From c5ed0acd19d81102bb0a3968f6a3c0b7b1f31f47 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Wed, 15 Feb 2023 09:14:40 -0800 Subject: [PATCH 01/15] Initial change to make intrinsics work with mini JIT on arm64 --- src/mono/mono/mini/cpu-arm64.mdesc | 5 +++++ src/mono/mono/mini/mini-arm64.c | 24 ++++++++++++++++++++++++ src/mono/mono/mini/mini.h | 8 -------- src/mono/mono/mini/simd-intrinsics.c | 11 ++++++++++- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index f47a6296363d9..8758cfcbe9bd6 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -122,6 +122,7 @@ r8const: dest:f len:20 label: len:0 store_membase_imm: dest:b len:20 store_membase_reg: dest:b src1:i len:20 +storex_membase: dest:b src1:x len:4 storei1_membase_imm: dest:b len:20 storei1_membase_reg: dest:b src1:i len:12 storei2_membase_imm: dest:b len:20 @@ -135,6 +136,7 @@ storei1_memindex: dest:b src1:i src2:i len:4 storei2_memindex: dest:b src1:i src2:i len:4 storei4_memindex: dest:b src1:i src2:i len:4 load_membase: dest:i src1:b len:20 +loadx_membase: dest:x src1:b len:4 loadi1_membase: dest:i src1:b len:32 loadu1_membase: dest:i src1:b len:32 loadi2_membase: dest:i src1:b len:32 @@ -493,6 +495,9 @@ atomic_store_i8: dest:b src1:i len:20 atomic_store_u8: dest:b src1:i len:20 atomic_store_r4: dest:b src1:f len:28 atomic_store_r8: dest:b src1:f len:24 +xbinop: dest:x src1:x src2:x len:4 +xzero: dest:x len:4 +xmove: dest:x src1:x len:4 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 0e2b70b5f12a9..75e25005cd98f 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3412,6 +3412,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; } + case OP_STOREX_MEMBASE: + arm_strfpq (code, sreg1, dreg, ins->inst_offset); + break; + case OP_LOADX_MEMBASE: + arm_ldrfpq (code, dreg, sreg1, ins->inst_offset); + break; + case OP_XZERO: + arm_neon_eor_16b (code, dreg, dreg, dreg); + break; + case OP_XMOVE: + arm_movw (code, dreg, sreg1); + break; /* BRANCH */ case OP_BR: @@ -3484,6 +3496,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); arm_cbnzx (code, sreg1, 0); break; + case OP_XBINOP: + switch (ins->inst_c0) { + case OP_IADD: + arm_neon_add (code, VREG_FULL, SIZE_2, dreg, sreg1, sreg2); + break; + case OP_ISUB: + arm_neon_sub_8h (code, dreg, sreg1, sreg2); + break; + default: + g_assert_not_reached (); + } + break; /* ALU */ case OP_IADD: arm_addw (code, dreg, sreg1, sreg2); diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 245e334fe2e5a..3c33ecee4b300 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -303,15 +303,7 @@ enum { #define MONO_IS_REAL_MOVE(ins) (((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_FMOVE) || ((ins)->opcode == OP_XMOVE) || ((ins)->opcode == OP_RMOVE)) #define MONO_IS_ZERO(ins) (((ins)->opcode == OP_VZERO) || ((ins)->opcode == OP_XZERO)) -#ifdef TARGET_ARM64 -/* - * SIMD is only supported on arm64 when using the LLVM backend. When not using - * the LLVM backend, treat SIMD datatypes as regular value types. - */ -#define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && COMPILE_LLVM (cfg) && m_class_is_simd_type (klass)) -#else #define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && m_class_is_simd_type (klass) && (COMPILE_LLVM (cfg) || mono_type_size (m_class_get_byval_arg (klass), NULL) == 16)) -#endif #else diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a1dc7c3683f16..7af70cde8ead1 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1192,9 +1192,11 @@ is_element_type_primitive (MonoType *vector_type) static MonoInst* emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { +#if defined(TARGET_AMD64) || defined(TARGET_WASM) if (!COMPILE_LLVM (cfg)) return NULL; - +#endif +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) return NULL; @@ -1208,6 +1210,13 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256")) return NULL; // TODO: Fix Vector256.WithUpper/WithLower + +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 +#ifdef TARGET_ARM64 + if (!COMPILE_LLVM (cfg) && id != SN_Add) + return NULL; +#endif + MonoClass *klass = cmethod->klass; MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID; From 03055a1b3911ffbd4dd880c915ba08059b600cc3 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Mon, 20 Feb 2023 22:44:40 -0800 Subject: [PATCH 02/15] Fix alignment issue and add size getter functions --- src/mono/mono/mini/mini-arm64.c | 63 +++++++++++++++++++++++++--- src/mono/mono/mini/simd-intrinsics.c | 8 ++-- 2 files changed, 62 insertions(+), 9 deletions(-) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 75e25005cd98f..8fed1454f5f20 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -2209,8 +2209,14 @@ mono_arch_allocate_vars (MonoCompile *cfg) cfg->ret->dreg = cinfo->ret.reg; break; case ArgVtypeInIRegs: - case ArgHFA: + case ArgHFA: { /* Allocate a local to hold the result, the epilog will copy it to the correct place */ + MonoType *ret_type = mini_get_underlying_type (sig->ret); + if (MONO_CLASS_IS_SIMD (cfg, mono_class_from_mono_type_internal (ret_type))) { + int align = 16; + offset = (offset + (align - 1)) & ~(align -1); + } + cfg->ret->opcode = OP_REGOFFSET; cfg->ret->inst_basereg = cfg->frame_reg; cfg->ret->inst_offset = offset; @@ -2220,6 +2226,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) else offset += 16; break; + } case ArgVtypeByRef: /* This variable will be initialized in the prolog from R8 */ cfg->vret_addr->opcode = OP_REGOFFSET; @@ -2377,7 +2384,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) ins->opcode = OP_REGOFFSET; ins->inst_basereg = cfg->frame_reg; ins->inst_offset = offset + offsets [i]; - //printf ("allocated local %d to ", i); mono_print_tree_nl (ins); + //printf ("allocated local %d to ", i); mono_print_ins (ins); } } offset += locals_stack_size; @@ -3235,6 +3242,52 @@ emit_branch_island (MonoCompile *cfg, guint8 *code, int start_offset) return code; } +static int +get_vector_size_macro (MonoInst *ins) +{ + int size = mono_class_value_size (ins->klass, NULL); + switch (size) { + case 16: + return VREG_FULL; + case 8: + return VREG_LOW; + default: + g_assert_not_reached (); + } +} + +static int +get_type_size_macro (MonoTypeEnum type) +{ + switch (type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return TYPE_I8; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return TYPE_I16; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return TYPE_I32; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return TYPE_I64; + case MONO_TYPE_I: + case MONO_TYPE_U: +#if TARGET_SIZEOF_VOID_P == 8 + return TYPE_I64; +#else + return TYPE_I32; +#endif + case MONO_TYPE_R4: + return TYPE_F32; + case MONO_TYPE_R8: + return TYPE_F64; + default: + g_assert_not_reached (); + } +} + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -3499,10 +3552,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_XBINOP: switch (ins->inst_c0) { case OP_IADD: - arm_neon_add (code, VREG_FULL, SIZE_2, dreg, sreg1, sreg2); + arm_neon_add (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); break; - case OP_ISUB: - arm_neon_sub_8h (code, dreg, sreg1, sreg2); + case OP_FADD: + arm_neon_fadd (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); break; default: g_assert_not_reached (); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 7af70cde8ead1..bc0c595e9efad 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1197,10 +1197,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #endif // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 -#ifdef TARGET_ARM64 - if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) - return NULL; -#endif +// #ifdef TARGET_ARM64 +// if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) +// return NULL; +// #endif int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod); if (id == -1) { From 2ebb20d733802441209870cdc26228e6b0e91b19 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Tue, 21 Feb 2023 14:27:30 -0800 Subject: [PATCH 03/15] Review feedback and refactor --- src/mono/mono/arch/arm64/arm64-codegen.h | 6 ++-- src/mono/mono/mini/mini-arm64.c | 39 +++++++++++++++++++++--- src/mono/mono/mini/mini.c | 9 ++---- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index a1d87bbb6de29..70fd23da27b8e 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -456,11 +456,11 @@ arm_encode_imm7 (int imm, int size) #define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0)) /* Load double */ -#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8) +#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, (dt), (xn), (simm), 8) /* Load single */ -#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4) +#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, (dt), (xn), (simm), 4) /* Load 128 bit */ -#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0, 0x3, qt, xn, simm, 16) +#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0x0, 0x3, (qt), (xn), (simm), 16) /* Arithmetic (immediate) */ static G_GNUC_UNUSED inline guint32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 8fed1454f5f20..62671da2618bc 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -579,6 +579,20 @@ emit_strfpx (guint8 *code, int rt, int rn, int imm) return code; } +static WARN_UNUSED_RESULT guint8* +emit_strfpq (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_strx_imm (imm)) { + arm_strfpq (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_strfpq (code, rt, ARMREG_IP0, 0); + } + return code; +} + static WARN_UNUSED_RESULT guint8* emit_strx (guint8 *code, int rt, int rn, int imm) { @@ -717,6 +731,20 @@ emit_ldrfpx (guint8 *code, int rt, int rn, int imm) return code; } +static WARN_UNUSED_RESULT guint8* +emit_ldrfpq (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 8)) { + arm_ldrfpq (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_ldrfpq (code, rt, ARMREG_IP0, 0); + } + return code; +} + guint8* mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm) { @@ -2212,9 +2240,10 @@ mono_arch_allocate_vars (MonoCompile *cfg) case ArgHFA: { /* Allocate a local to hold the result, the epilog will copy it to the correct place */ MonoType *ret_type = mini_get_underlying_type (sig->ret); - if (MONO_CLASS_IS_SIMD (cfg, mono_class_from_mono_type_internal (ret_type))) { - int align = 16; - offset = (offset + (align - 1)) & ~(align -1); + MonoClass *klass = mono_class_from_mono_type_internal (ret_type); + if (MONO_CLASS_IS_SIMD (cfg, klass)) { + int align = mono_type_size (m_class_get_byval_arg (klass), NULL); + offset = ALIGN_TO (offset, align); } cfg->ret->opcode = OP_REGOFFSET; @@ -3466,10 +3495,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; } case OP_STOREX_MEMBASE: - arm_strfpq (code, sreg1, dreg, ins->inst_offset); + code = emit_strfpq (code, sreg1, dreg, ins->inst_offset); break; case OP_LOADX_MEMBASE: - arm_ldrfpq (code, dreg, sreg1, ins->inst_offset); + code = emit_ldrfpq (code, dreg, sreg1, ins->inst_offset); break; case OP_XZERO: arm_neon_eor_16b (code, dreg, dreg, dreg); diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 8d95874df53df..45d0a4696c8bf 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -1502,18 +1502,15 @@ mono_allocate_stack_slots (MonoCompile *cfg, gboolean backward, guint32 *stack_s * Align the size too so the code generated for passing vtypes in * registers doesn't overwrite random locals. */ - size = (size + (align - 1)) & ~(align -1); + size = ALIGN_TO (size, align); } if (backward) { - offset += size; - offset += align - 1; - offset &= ~(align - 1); + offset = ALIGN_TO (size, align); slot = offset; } else { - offset += align - 1; - offset &= ~(align - 1); + offset = ALIGN_TO (0, align); slot = offset; offset += size; } From 6be92a016508964bea6b1b01d52db806399d6008 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Wed, 22 Feb 2023 06:40:07 -0800 Subject: [PATCH 04/15] Fix align --- src/mono/mono/mini/mini.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 45d0a4696c8bf..d927e38cf481a 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -1506,11 +1506,11 @@ mono_allocate_stack_slots (MonoCompile *cfg, gboolean backward, guint32 *stack_s } if (backward) { - offset = ALIGN_TO (size, align); + offset = ALIGN_TO (offset + size, align); slot = offset; } else { - offset = ALIGN_TO (0, align); + offset = ALIGN_TO (offset, align); slot = offset; offset += size; } From 60531dd61938629028a60c3c26c128556983bb37 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Wed, 22 Feb 2023 09:33:50 -0800 Subject: [PATCH 05/15] Fix issue with big offset --- src/mono/mono/mini/cpu-arm64.mdesc | 4 ++-- src/mono/mono/mini/mini-arm64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 8758cfcbe9bd6..0093e653bc5ce 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -122,7 +122,7 @@ r8const: dest:f len:20 label: len:0 store_membase_imm: dest:b len:20 store_membase_reg: dest:b src1:i len:20 -storex_membase: dest:b src1:x len:4 +storex_membase: dest:b src1:x len:12 storei1_membase_imm: dest:b len:20 storei1_membase_reg: dest:b src1:i len:12 storei2_membase_imm: dest:b len:20 @@ -136,7 +136,7 @@ storei1_memindex: dest:b src1:i src2:i len:4 storei2_memindex: dest:b src1:i src2:i len:4 storei4_memindex: dest:b src1:i src2:i len:4 load_membase: dest:i src1:b len:20 -loadx_membase: dest:x src1:b len:4 +loadx_membase: dest:x src1:b len:12 loadi1_membase: dest:i src1:b len:32 loadu1_membase: dest:i src1:b len:32 loadi2_membase: dest:i src1:b len:32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 62671da2618bc..1701c804809cf 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -582,7 +582,7 @@ emit_strfpx (guint8 *code, int rt, int rn, int imm) static WARN_UNUSED_RESULT guint8* emit_strfpq (guint8 *code, int rt, int rn, int imm) { - if (arm_is_strx_imm (imm)) { + if (arm_is_pimm12_scaled (imm, 16)) { arm_strfpq (code, rt, rn, imm); } else { g_assert (rn != ARMREG_IP0); @@ -734,7 +734,7 @@ emit_ldrfpx (guint8 *code, int rt, int rn, int imm) static WARN_UNUSED_RESULT guint8* emit_ldrfpq (guint8 *code, int rt, int rn, int imm) { - if (arm_is_pimm12_scaled (imm, 8)) { + if (arm_is_pimm12_scaled (imm, 16)) { arm_ldrfpq (code, rt, rn, imm); } else { g_assert (rn != ARMREG_IP0); From 371aa57acca7f95f5aa35340ef51732ad7f0bf7d Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Wed, 22 Feb 2023 10:20:52 -0800 Subject: [PATCH 06/15] Fix build warning --- src/mono/mono/mini/mini-arm64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 1701c804809cf..e64959da37fc6 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -2242,8 +2242,8 @@ mono_arch_allocate_vars (MonoCompile *cfg) MonoType *ret_type = mini_get_underlying_type (sig->ret); MonoClass *klass = mono_class_from_mono_type_internal (ret_type); if (MONO_CLASS_IS_SIMD (cfg, klass)) { - int align = mono_type_size (m_class_get_byval_arg (klass), NULL); - offset = ALIGN_TO (offset, align); + int align_simd = mono_type_size (m_class_get_byval_arg (klass), NULL); + offset = ALIGN_TO (offset, align_simd); } cfg->ret->opcode = OP_REGOFFSET; From 4ea433a51dfec0c875781100f8223a8041b4fb8c Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Thu, 23 Feb 2023 20:41:40 -0800 Subject: [PATCH 07/15] Add intrinsics for get_One --- src/mono/mono/arch/arm64/arm64-codegen.h | 8 +++ src/mono/mono/mini/mini-arm64.c | 47 ++++++++++++++ src/mono/mono/mini/simd-intrinsics.c | 83 ++++++++++++++++++++++++ 3 files changed, 138 insertions(+) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 70fd23da27b8e..92130016fb734 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -462,6 +462,14 @@ arm_encode_imm7 (int imm, int size) /* Load 128 bit */ #define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0x0, 0x3, (qt), (xn), (simm), 16) +/* LDR (literal, SIMD&FP) PC-relative*/ +/* Load single */ +#define arm_neon_ldrs_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b00 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +/* Load double */ +#define arm_neon_ldrd_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b01 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +/* Load 128 bit */ +#define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) + /* Arithmetic (immediate) */ static G_GNUC_UNUSED inline guint32 arm_encode_arith_imm (int imm, guint32 *shift) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index e64959da37fc6..5a280d849ec68 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3506,6 +3506,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_XMOVE: arm_movw (code, dreg, sreg1); break; + case OP_XCONST: { + if (cfg->compile_aot && cfg->code_exec_only) { + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128_GOT, ins->inst_p0); + arm_ldrx_lit (code, ARMREG_IP0, 0); + arm_ldrfpq (code, ins->dreg, ARMREG_IP0, 0); + } else { + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128, ins->inst_p0); + arm_neon_ldrq_lit (code, ins->dreg, 0); + } + break; + } /* BRANCH */ case OP_BR: @@ -5371,6 +5382,8 @@ mono_arch_emit_exceptions (MonoCompile *cfg) size += 32; exc_throw_found [i] = TRUE; } + } else if (ji->type == MONO_PATCH_INFO_X128) { + size += 16 + 15; /* sizeof (Vector128) + alignment */ } } @@ -5412,6 +5425,40 @@ mono_arch_emit_exceptions (MonoCompile *cfg) set_code_cursor (cfg, code); } + /* Handle relocations with RIP relative addressing */ + for (ji = cfg->patch_info; ji; ji = ji->next) { + gboolean remove = FALSE; + + if (ji->type == MONO_PATCH_INFO_X128) { + guint8 *pos, *patch_pos; + guint32 target_pos; + + code = (guint8*)ALIGN_TO (code, 16); + pos = cfg->native_code + ji->ip.i; + patch_pos = pos + 3; + target_pos = GPTRDIFF_TO_UINT32 (code - pos - 4); + memcpy (code, ji->data.target, 16); + code += 16; + + *(guint32*)(patch_pos) = target_pos; + + remove = TRUE; + } + + if (remove) { + if (ji == cfg->patch_info) + cfg->patch_info = ji->next; + else { + MonoJumpInfo *tmp; + + for (tmp = cfg->patch_info; tmp->next != ji; tmp = tmp->next) + ; + tmp->next = ji->next; + } + } + set_code_cursor (cfg, code); + } + set_code_cursor (cfg, code); } diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index bc0c595e9efad..a11998e2be9c9 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1807,6 +1807,7 @@ static guint16 vector64_vector128_t_methods [] = { SN_get_AllBitsSet, SN_get_Count, SN_get_IsSupported, + SN_get_One, SN_get_Zero, SN_op_Addition, SN_op_BitwiseAnd, @@ -1860,8 +1861,16 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign break; } +#if defined(TARGET_AMD64) || defined(TARGET_WASM) if (!COMPILE_LLVM (cfg)) return NULL; +#endif + +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 +#ifdef TARGET_ARM64 + if (!COMPILE_LLVM (cfg) && id != SN_get_One && id != SN_get_Zero) + return NULL; +#endif switch (id) { case SN_get_Count: { @@ -1877,6 +1886,80 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign case SN_get_AllBitsSet: { return emit_xones (cfg, klass); } + case SN_get_One: { + if (size != 16) + return NULL; + switch (etype->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: { + guint8 value[16]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, value); + } + case MONO_TYPE_I2: + case MONO_TYPE_U2: { + guint16 value[8]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } +#if TARGET_SIZEOF_VOID_P == 4 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_I4: + case MONO_TYPE_U4: { + guint32 value[4]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } +#if TARGET_SIZEOF_VOID_P == 8 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_I8: + case MONO_TYPE_U8: { + guint64 value[2]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R4: { + float value[4]; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0f; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R8: { + double value[2]; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + default: + g_assert_not_reached (); + } + } case SN_op_Addition: case SN_op_BitwiseAnd: case SN_op_BitwiseOr: From 6b0400bb46b410d7e43fe204a982c8110cae32c2 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Fri, 24 Feb 2023 03:48:45 -0800 Subject: [PATCH 08/15] Add xconst to mdesc --- src/mono/mono/mini/cpu-arm64.mdesc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index 0093e653bc5ce..43d8dd639de7d 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -498,6 +498,7 @@ atomic_store_r8: dest:b src1:f len:24 xbinop: dest:x src1:x src2:x len:4 xzero: dest:x len:4 xmove: dest:x src1:x len:4 +xconst: dest:x len:10 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c From 7df8b208184578dc4f4b2e5004fd4e96d10220c2 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Fri, 24 Feb 2023 04:31:36 -0800 Subject: [PATCH 09/15] Fix MONO_PATCH_INFO_X128 --- src/mono/mono/arch/arm64/arm64-codegen.h | 1 + src/mono/mono/mini/mini-arm64.c | 8 ++------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 92130016fb734..85aa2de9cc49d 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -469,6 +469,7 @@ arm_encode_imm7 (int imm, int size) #define arm_neon_ldrd_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b01 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) /* Load 128 bit */ #define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +#define arm_neon_ldrq_lit_fixup(p, target) *((guint32*)p) = (*((guint32*)p) & 0xff00001f) | (arm_get_disp19 ((p), (target)) << 5) /* Arithmetic (immediate) */ static G_GNUC_UNUSED inline guint32 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 5a280d849ec68..b366bbfedc915 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -5430,18 +5430,14 @@ mono_arch_emit_exceptions (MonoCompile *cfg) gboolean remove = FALSE; if (ji->type == MONO_PATCH_INFO_X128) { - guint8 *pos, *patch_pos; - guint32 target_pos; + guint8 *pos; code = (guint8*)ALIGN_TO (code, 16); pos = cfg->native_code + ji->ip.i; - patch_pos = pos + 3; - target_pos = GPTRDIFF_TO_UINT32 (code - pos - 4); + arm_neon_ldrq_lit_fixup (pos, code); memcpy (code, ji->data.target, 16); code += 16; - *(guint32*)(patch_pos) = target_pos; - remove = TRUE; } From dab419552cddf000986aef761f2bf68e726c9f30 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Wed, 1 Mar 2023 07:14:20 -0800 Subject: [PATCH 10/15] Stop the simd and fp registers sharing --- src/mono/mono/mini/mini-arm64.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/mini-arm64.h b/src/mono/mono/mini/mini-arm64.h index 33c3a29466056..7dfe8eed08c06 100644 --- a/src/mono/mono/mini/mini-arm64.h +++ b/src/mono/mono/mini/mini-arm64.h @@ -23,6 +23,8 @@ #if !defined(DISABLE_SIMD) #define MONO_ARCH_SIMD_INTRINSICS 1 +#define MONO_ARCH_NEED_SIMD_BANK 1 +#define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1 #endif #define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0) @@ -52,7 +54,7 @@ /* v8..v15 */ #define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00 -#define MONO_ARCH_CALLEE_SAVED_XREGS 0 +#define MONO_ARCH_CALLEE_SAVED_XREGS MONO_ARCH_CALLEE_SAVED_FREGS #define MONO_ARCH_CALLEE_XREGS MONO_ARCH_CALLEE_FREGS From 8b4355da7a5f8d840d39c6ef436ac306cbbe783f Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Thu, 2 Mar 2023 12:17:39 -0800 Subject: [PATCH 11/15] Stop v64 from emitting simd intrinsics --- src/mono/mono/mini/simd-intrinsics.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a11998e2be9c9..217d0b1feaf93 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1213,7 +1213,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 - if (!COMPILE_LLVM (cfg) && id != SN_Add) + int class_size = mono_class_value_size (cmethod->klass, NULL); + if (!COMPILE_LLVM (cfg) && class_size !=16) + return NULL; + if (id != SN_Add) return NULL; #endif @@ -1262,7 +1265,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #else return NULL; #endif -} + } case SN_Add: case SN_BitwiseAnd: case SN_BitwiseOr: @@ -1868,7 +1871,9 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 - if (!COMPILE_LLVM (cfg) && id != SN_get_One && id != SN_get_Zero) + if (!COMPILE_LLVM (cfg) && size != 16) + return NULL; + if (!(id == SN_get_One || id == SN_get_Zero)) return NULL; #endif From 0ed6de6e2a2fad5ab6486ba98e76a3c19cae72df Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Sun, 5 Mar 2023 08:30:49 -0800 Subject: [PATCH 12/15] Move between SIMD registers --- src/mono/mono/arch/arm64/arm64-codegen.h | 3 +++ src/mono/mono/mini/mini-arm64.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index 85aa2de9cc49d..334a550da3e23 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1009,6 +1009,9 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define TYPE_F32 0 #define TYPE_F64 1 +/* NEON :: move SIMD register*/ +#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn)) + /* NEON :: AES */ #define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn)) #define arm_neon_aese(p, rd, rn) arm_neon_aes_opcode ((p), 0b00, 0b00100, (rd), (rn)) diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index b366bbfedc915..843e18aa5cda5 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3504,7 +3504,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) arm_neon_eor_16b (code, dreg, dreg, dreg); break; case OP_XMOVE: - arm_movw (code, dreg, sreg1); + arm_neon_mov (code, dreg, sreg1); break; case OP_XCONST: { if (cfg->compile_aot && cfg->code_exec_only) { From b5380daaee8dec73b684cf21cd6b3ee9c665a07a Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Sun, 5 Mar 2023 22:10:21 -0800 Subject: [PATCH 13/15] Adjust filter logic --- src/mono/mono/mini/simd-intrinsics.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 217d0b1feaf93..c87eb03ce769b 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1213,14 +1213,14 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 - int class_size = mono_class_value_size (cmethod->klass, NULL); - if (!COMPILE_LLVM (cfg) && class_size !=16) + if (!COMPILE_LLVM (cfg) && id != SN_Add) return NULL; - if (id != SN_Add) + MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]); + int class_size = mono_class_value_size (arg0_class, NULL); + if (!COMPILE_LLVM (cfg) && class_size != 16) return NULL; #endif - - + MonoClass *klass = cmethod->klass; MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID; @@ -1873,7 +1873,7 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign #ifdef TARGET_ARM64 if (!COMPILE_LLVM (cfg) && size != 16) return NULL; - if (!(id == SN_get_One || id == SN_get_Zero)) + if (!COMPILE_LLVM (cfg) && !(id == SN_get_One || id == SN_get_Zero)) return NULL; #endif From 618797c9e9d6784798f55077cd32743a63e31d60 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Mon, 6 Mar 2023 08:30:50 -0800 Subject: [PATCH 14/15] Keep the filter logic under non-llvm --- src/mono/mono/mini/simd-intrinsics.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index c87eb03ce769b..0f51776554c68 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1213,12 +1213,14 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 - if (!COMPILE_LLVM (cfg) && id != SN_Add) - return NULL; - MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]); - int class_size = mono_class_value_size (arg0_class, NULL); - if (!COMPILE_LLVM (cfg) && class_size != 16) - return NULL; + if (!COMPILE_LLVM (cfg)) { + if (id != SN_Add) + return NULL; + MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]); + int class_size = mono_class_value_size (arg0_class, NULL); + if (class_size != 16) + return NULL; + } #endif MonoClass *klass = cmethod->klass; @@ -1871,10 +1873,12 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 - if (!COMPILE_LLVM (cfg) && size != 16) - return NULL; - if (!COMPILE_LLVM (cfg) && !(id == SN_get_One || id == SN_get_Zero)) - return NULL; + if (!COMPILE_LLVM (cfg)) { + if (size != 16) + return NULL; + if (!(id == SN_get_One || id == SN_get_Zero)) + return NULL; + } #endif switch (id) { From 31a9c6c66c20af132d517baf1d5780d58979058a Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Mon, 6 Mar 2023 10:51:41 -0800 Subject: [PATCH 15/15] Uncomment --- src/mono/mono/mini/simd-intrinsics.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 0f51776554c68..b7136f78092e1 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1197,10 +1197,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #endif // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 -// #ifdef TARGET_ARM64 -// if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) -// return NULL; -// #endif +#ifdef TARGET_ARM64 + if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) + return NULL; +#endif int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod); if (id == -1) {