diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index a1d87bbb6de29..334a550da3e23 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -456,11 +456,20 @@ arm_encode_imm7 (int imm, int size) #define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0)) /* Load double */ -#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8) +#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, (dt), (xn), (simm), 8) /* Load single */ -#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4) +#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, (dt), (xn), (simm), 4) /* Load 128 bit */ -#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0, 0x3, qt, xn, simm, 16) +#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0x0, 0x3, (qt), (xn), (simm), 16) + +/* LDR (literal, SIMD&FP) PC-relative*/ +/* Load single */ +#define arm_neon_ldrs_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b00 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +/* Load double */ +#define arm_neon_ldrd_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b01 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +/* Load 128 bit */ +#define arm_neon_ldrq_lit(p, rd, target) arm_emit ((p), 0b00011100000000000000000000000000 | (0b10 << 30) | (arm_get_disp19 ((p), (target)) << 5) | (rd)) +#define arm_neon_ldrq_lit_fixup(p, target) *((guint32*)p) = (*((guint32*)p) & 0xff00001f) | (arm_get_disp19 ((p), (target)) << 5) /* Arithmetic (immediate) */ static G_GNUC_UNUSED inline guint32 @@ -1000,6 +1009,9 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define TYPE_F32 0 #define TYPE_F64 1 +/* NEON :: move SIMD register*/ +#define arm_neon_mov(p, rd, rn) arm_neon_orr ((p), VREG_FULL, (rd), (rn), (rn)) + /* NEON :: AES */ #define arm_neon_aes_opcode(p, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), VREG_FULL, 0b00001110001010000000100000000000 | (size) << 22 | (opcode) << 12, (rd), (rn)) #define arm_neon_aese(p, rd, rn) arm_neon_aes_opcode ((p), 0b00, 0b00100, (rd), (rn)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index f47a6296363d9..43d8dd639de7d 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -122,6 +122,7 @@ r8const: dest:f len:20 label: len:0 store_membase_imm: dest:b len:20 store_membase_reg: dest:b src1:i len:20 +storex_membase: dest:b src1:x len:12 storei1_membase_imm: dest:b len:20 storei1_membase_reg: dest:b src1:i len:12 storei2_membase_imm: dest:b len:20 @@ -135,6 +136,7 @@ storei1_memindex: dest:b src1:i src2:i len:4 storei2_memindex: dest:b src1:i src2:i len:4 storei4_memindex: dest:b src1:i src2:i len:4 load_membase: dest:i src1:b len:20 +loadx_membase: dest:x src1:b len:12 loadi1_membase: dest:i src1:b len:32 loadu1_membase: dest:i src1:b len:32 loadi2_membase: dest:i src1:b len:32 @@ -493,6 +495,10 @@ atomic_store_i8: dest:b src1:i len:20 atomic_store_u8: dest:b src1:i len:20 atomic_store_r4: dest:b src1:f len:28 atomic_store_r8: dest:b src1:f len:24 +xbinop: dest:x src1:x src2:x len:4 +xzero: dest:x len:4 +xmove: dest:x src1:x len:4 +xconst: dest:x len:10 generic_class_init: src1:a len:44 clob:c gc_safe_point: src1:i len:12 clob:c diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index 0e2b70b5f12a9..843e18aa5cda5 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -579,6 +579,20 @@ emit_strfpx (guint8 *code, int rt, int rn, int imm) return code; } +static WARN_UNUSED_RESULT guint8* +emit_strfpq (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 16)) { + arm_strfpq (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_strfpq (code, rt, ARMREG_IP0, 0); + } + return code; +} + static WARN_UNUSED_RESULT guint8* emit_strx (guint8 *code, int rt, int rn, int imm) { @@ -717,6 +731,20 @@ emit_ldrfpx (guint8 *code, int rt, int rn, int imm) return code; } +static WARN_UNUSED_RESULT guint8* +emit_ldrfpq (guint8 *code, int rt, int rn, int imm) +{ + if (arm_is_pimm12_scaled (imm, 16)) { + arm_ldrfpq (code, rt, rn, imm); + } else { + g_assert (rn != ARMREG_IP0); + code = emit_imm (code, ARMREG_IP0, imm); + arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0); + arm_ldrfpq (code, rt, ARMREG_IP0, 0); + } + return code; +} + guint8* mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm) { @@ -2209,8 +2237,15 @@ mono_arch_allocate_vars (MonoCompile *cfg) cfg->ret->dreg = cinfo->ret.reg; break; case ArgVtypeInIRegs: - case ArgHFA: + case ArgHFA: { /* Allocate a local to hold the result, the epilog will copy it to the correct place */ + MonoType *ret_type = mini_get_underlying_type (sig->ret); + MonoClass *klass = mono_class_from_mono_type_internal (ret_type); + if (MONO_CLASS_IS_SIMD (cfg, klass)) { + int align_simd = mono_type_size (m_class_get_byval_arg (klass), NULL); + offset = ALIGN_TO (offset, align_simd); + } + cfg->ret->opcode = OP_REGOFFSET; cfg->ret->inst_basereg = cfg->frame_reg; cfg->ret->inst_offset = offset; @@ -2220,6 +2255,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) else offset += 16; break; + } case ArgVtypeByRef: /* This variable will be initialized in the prolog from R8 */ cfg->vret_addr->opcode = OP_REGOFFSET; @@ -2377,7 +2413,7 @@ mono_arch_allocate_vars (MonoCompile *cfg) ins->opcode = OP_REGOFFSET; ins->inst_basereg = cfg->frame_reg; ins->inst_offset = offset + offsets [i]; - //printf ("allocated local %d to ", i); mono_print_tree_nl (ins); + //printf ("allocated local %d to ", i); mono_print_ins (ins); } } offset += locals_stack_size; @@ -3235,6 +3271,52 @@ emit_branch_island (MonoCompile *cfg, guint8 *code, int start_offset) return code; } +static int +get_vector_size_macro (MonoInst *ins) +{ + int size = mono_class_value_size (ins->klass, NULL); + switch (size) { + case 16: + return VREG_FULL; + case 8: + return VREG_LOW; + default: + g_assert_not_reached (); + } +} + +static int +get_type_size_macro (MonoTypeEnum type) +{ + switch (type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return TYPE_I8; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return TYPE_I16; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return TYPE_I32; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return TYPE_I64; + case MONO_TYPE_I: + case MONO_TYPE_U: +#if TARGET_SIZEOF_VOID_P == 8 + return TYPE_I64; +#else + return TYPE_I32; +#endif + case MONO_TYPE_R4: + return TYPE_F32; + case MONO_TYPE_R8: + return TYPE_F64; + default: + g_assert_not_reached (); + } +} + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -3412,6 +3494,29 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; } + case OP_STOREX_MEMBASE: + code = emit_strfpq (code, sreg1, dreg, ins->inst_offset); + break; + case OP_LOADX_MEMBASE: + code = emit_ldrfpq (code, dreg, sreg1, ins->inst_offset); + break; + case OP_XZERO: + arm_neon_eor_16b (code, dreg, dreg, dreg); + break; + case OP_XMOVE: + arm_neon_mov (code, dreg, sreg1); + break; + case OP_XCONST: { + if (cfg->compile_aot && cfg->code_exec_only) { + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128_GOT, ins->inst_p0); + arm_ldrx_lit (code, ARMREG_IP0, 0); + arm_ldrfpq (code, ins->dreg, ARMREG_IP0, 0); + } else { + mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128, ins->inst_p0); + arm_neon_ldrq_lit (code, ins->dreg, 0); + } + break; + } /* BRANCH */ case OP_BR: @@ -3484,6 +3589,18 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) mono_add_patch_info_rel (cfg, offset, MONO_PATCH_INFO_BB, ins->inst_true_bb, MONO_R_ARM64_CBZ); arm_cbnzx (code, sreg1, 0); break; + case OP_XBINOP: + switch (ins->inst_c0) { + case OP_IADD: + arm_neon_add (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); + break; + case OP_FADD: + arm_neon_fadd (code, get_vector_size_macro (ins), get_type_size_macro (ins->inst_c1), dreg, sreg1, sreg2); + break; + default: + g_assert_not_reached (); + } + break; /* ALU */ case OP_IADD: arm_addw (code, dreg, sreg1, sreg2); @@ -5265,6 +5382,8 @@ mono_arch_emit_exceptions (MonoCompile *cfg) size += 32; exc_throw_found [i] = TRUE; } + } else if (ji->type == MONO_PATCH_INFO_X128) { + size += 16 + 15; /* sizeof (Vector128) + alignment */ } } @@ -5306,6 +5425,36 @@ mono_arch_emit_exceptions (MonoCompile *cfg) set_code_cursor (cfg, code); } + /* Handle relocations with RIP relative addressing */ + for (ji = cfg->patch_info; ji; ji = ji->next) { + gboolean remove = FALSE; + + if (ji->type == MONO_PATCH_INFO_X128) { + guint8 *pos; + + code = (guint8*)ALIGN_TO (code, 16); + pos = cfg->native_code + ji->ip.i; + arm_neon_ldrq_lit_fixup (pos, code); + memcpy (code, ji->data.target, 16); + code += 16; + + remove = TRUE; + } + + if (remove) { + if (ji == cfg->patch_info) + cfg->patch_info = ji->next; + else { + MonoJumpInfo *tmp; + + for (tmp = cfg->patch_info; tmp->next != ji; tmp = tmp->next) + ; + tmp->next = ji->next; + } + } + set_code_cursor (cfg, code); + } + set_code_cursor (cfg, code); } diff --git a/src/mono/mono/mini/mini-arm64.h b/src/mono/mono/mini/mini-arm64.h index 33c3a29466056..7dfe8eed08c06 100644 --- a/src/mono/mono/mini/mini-arm64.h +++ b/src/mono/mono/mini/mini-arm64.h @@ -23,6 +23,8 @@ #if !defined(DISABLE_SIMD) #define MONO_ARCH_SIMD_INTRINSICS 1 +#define MONO_ARCH_NEED_SIMD_BANK 1 +#define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1 #endif #define MONO_CONTEXT_SET_LLVM_EXC_REG(ctx, exc) do { (ctx)->regs [0] = (gsize)exc; } while (0) @@ -52,7 +54,7 @@ /* v8..v15 */ #define MONO_ARCH_CALLEE_SAVED_FREGS 0xff00 -#define MONO_ARCH_CALLEE_SAVED_XREGS 0 +#define MONO_ARCH_CALLEE_SAVED_XREGS MONO_ARCH_CALLEE_SAVED_FREGS #define MONO_ARCH_CALLEE_XREGS MONO_ARCH_CALLEE_FREGS diff --git a/src/mono/mono/mini/mini.c b/src/mono/mono/mini/mini.c index 8d95874df53df..d927e38cf481a 100644 --- a/src/mono/mono/mini/mini.c +++ b/src/mono/mono/mini/mini.c @@ -1502,18 +1502,15 @@ mono_allocate_stack_slots (MonoCompile *cfg, gboolean backward, guint32 *stack_s * Align the size too so the code generated for passing vtypes in * registers doesn't overwrite random locals. */ - size = (size + (align - 1)) & ~(align -1); + size = ALIGN_TO (size, align); } if (backward) { - offset += size; - offset += align - 1; - offset &= ~(align - 1); + offset = ALIGN_TO (offset + size, align); slot = offset; } else { - offset += align - 1; - offset &= ~(align - 1); + offset = ALIGN_TO (offset, align); slot = offset; offset += size; } diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 245e334fe2e5a..3c33ecee4b300 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -303,15 +303,7 @@ enum { #define MONO_IS_REAL_MOVE(ins) (((ins)->opcode == OP_MOVE) || ((ins)->opcode == OP_FMOVE) || ((ins)->opcode == OP_XMOVE) || ((ins)->opcode == OP_RMOVE)) #define MONO_IS_ZERO(ins) (((ins)->opcode == OP_VZERO) || ((ins)->opcode == OP_XZERO)) -#ifdef TARGET_ARM64 -/* - * SIMD is only supported on arm64 when using the LLVM backend. When not using - * the LLVM backend, treat SIMD datatypes as regular value types. - */ -#define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && COMPILE_LLVM (cfg) && m_class_is_simd_type (klass)) -#else #define MONO_CLASS_IS_SIMD(cfg, klass) (((cfg)->opt & MONO_OPT_SIMD) && m_class_is_simd_type (klass) && (COMPILE_LLVM (cfg) || mono_type_size (m_class_get_byval_arg (klass), NULL) == 16)) -#endif #else diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index a1dc7c3683f16..b7136f78092e1 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1192,9 +1192,11 @@ is_element_type_primitive (MonoType *vector_type) static MonoInst* emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsig, MonoInst **args) { +#if defined(TARGET_AMD64) || defined(TARGET_WASM) if (!COMPILE_LLVM (cfg)) return NULL; - +#endif +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp)) return NULL; @@ -1208,7 +1210,19 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256")) return NULL; // TODO: Fix Vector256.WithUpper/WithLower - + +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 +#ifdef TARGET_ARM64 + if (!COMPILE_LLVM (cfg)) { + if (id != SN_Add) + return NULL; + MonoClass *arg0_class = mono_class_from_mono_type_internal (fsig->params [0]); + int class_size = mono_class_value_size (arg0_class, NULL); + if (class_size != 16) + return NULL; + } +#endif + MonoClass *klass = cmethod->klass; MonoTypeEnum arg0_type = fsig->param_count > 0 ? get_underlying_type (fsig->params [0]) : MONO_TYPE_VOID; @@ -1253,7 +1267,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #else return NULL; #endif -} + } case SN_Add: case SN_BitwiseAnd: case SN_BitwiseOr: @@ -1798,6 +1812,7 @@ static guint16 vector64_vector128_t_methods [] = { SN_get_AllBitsSet, SN_get_Count, SN_get_IsSupported, + SN_get_One, SN_get_Zero, SN_op_Addition, SN_op_BitwiseAnd, @@ -1851,8 +1866,20 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign break; } +#if defined(TARGET_AMD64) || defined(TARGET_WASM) if (!COMPILE_LLVM (cfg)) return NULL; +#endif + +// FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 +#ifdef TARGET_ARM64 + if (!COMPILE_LLVM (cfg)) { + if (size != 16) + return NULL; + if (!(id == SN_get_One || id == SN_get_Zero)) + return NULL; + } +#endif switch (id) { case SN_get_Count: { @@ -1868,6 +1895,80 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign case SN_get_AllBitsSet: { return emit_xones (cfg, klass); } + case SN_get_One: { + if (size != 16) + return NULL; + switch (etype->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: { + guint8 value[16]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, value); + } + case MONO_TYPE_I2: + case MONO_TYPE_U2: { + guint16 value[8]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } +#if TARGET_SIZEOF_VOID_P == 4 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_I4: + case MONO_TYPE_U4: { + guint32 value[4]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } +#if TARGET_SIZEOF_VOID_P == 8 + case MONO_TYPE_I: + case MONO_TYPE_U: +#endif + case MONO_TYPE_I8: + case MONO_TYPE_U8: { + guint64 value[2]; + + for (int i = 0; i < len; ++i) { + value [i] = 1; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R4: { + float value[4]; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0f; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R8: { + double value[2]; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + default: + g_assert_not_reached (); + } + } case SN_op_Addition: case SN_op_BitwiseAnd: case SN_op_BitwiseOr: