Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mono] Add initial arm64 hardware intrinsics support for mini JIT #82420

Merged
merged 15 commits into from
Mar 6, 2023
Merged
6 changes: 3 additions & 3 deletions src/mono/mono/arch/arm64/arm64-codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,11 @@ arm_encode_imm7 (int imm, int size)
#define arm_format_ldrfp_imm(p, size, opc, rt, rn, pimm, scale) arm_emit ((p), ((size) << 30) | (0xf << 26) | (0x1 << 24) | ((opc) << 22) | (arm_encode_pimm12 ((pimm), (scale)) << 10) | ((rn) << 5) | ((rt) << 0))

/* Load double */
#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, dt, xn, simm, 8)
#define arm_ldrfpx(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_X, 0x1, (dt), (xn), (simm), 8)
/* Load single */
#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, dt, xn, simm, 4)
#define arm_ldrfpw(p, dt, xn, simm) arm_format_ldrfp_imm ((p), ARMSIZE_W, 0x1, (dt), (xn), (simm), 4)
/* Load 128 bit */
#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0, 0x3, qt, xn, simm, 16)
#define arm_ldrfpq(p, qt, xn, simm) arm_format_ldrfp_imm ((p), 0x0, 0x3, (qt), (xn), (simm), 16)

/* Arithmetic (immediate) */
static G_GNUC_UNUSED inline guint32
Expand Down
39 changes: 34 additions & 5 deletions src/mono/mono/mini/mini-arm64.c
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,20 @@ emit_strfpx (guint8 *code, int rt, int rn, int imm)
return code;
}

static WARN_UNUSED_RESULT guint8*
emit_strfpq (guint8 *code, int rt, int rn, int imm)
{
if (arm_is_strx_imm (imm)) {
arm_strfpq (code, rt, rn, imm);
} else {
g_assert (rn != ARMREG_IP0);
code = emit_imm (code, ARMREG_IP0, imm);
arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
arm_strfpq (code, rt, ARMREG_IP0, 0);
}
return code;
}

static WARN_UNUSED_RESULT guint8*
emit_strx (guint8 *code, int rt, int rn, int imm)
{
Expand Down Expand Up @@ -717,6 +731,20 @@ emit_ldrfpx (guint8 *code, int rt, int rn, int imm)
return code;
}

static WARN_UNUSED_RESULT guint8*
emit_ldrfpq (guint8 *code, int rt, int rn, int imm)
{
if (arm_is_pimm12_scaled (imm, 8)) {
arm_ldrfpq (code, rt, rn, imm);
} else {
g_assert (rn != ARMREG_IP0);
code = emit_imm (code, ARMREG_IP0, imm);
arm_addx (code, ARMREG_IP0, rn, ARMREG_IP0);
arm_ldrfpq (code, rt, ARMREG_IP0, 0);
}
return code;
}

guint8*
mono_arm_emit_ldrx (guint8 *code, int rt, int rn, int imm)
{
Expand Down Expand Up @@ -2212,9 +2240,10 @@ mono_arch_allocate_vars (MonoCompile *cfg)
case ArgHFA: {
/* Allocate a local to hold the result, the epilog will copy it to the correct place */
MonoType *ret_type = mini_get_underlying_type (sig->ret);
if (MONO_CLASS_IS_SIMD (cfg, mono_class_from_mono_type_internal (ret_type))) {
int align = 16;
offset = (offset + (align - 1)) & ~(align -1);
MonoClass *klass = mono_class_from_mono_type_internal (ret_type);
if (MONO_CLASS_IS_SIMD (cfg, klass)) {
int align = mono_type_size (m_class_get_byval_arg (klass), NULL);
offset = ALIGN_TO (offset, align);
}

cfg->ret->opcode = OP_REGOFFSET;
Expand Down Expand Up @@ -3466,10 +3495,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
break;
}
case OP_STOREX_MEMBASE:
arm_strfpq (code, sreg1, dreg, ins->inst_offset);
code = emit_strfpq (code, sreg1, dreg, ins->inst_offset);
break;
case OP_LOADX_MEMBASE:
arm_ldrfpq (code, dreg, sreg1, ins->inst_offset);
code = emit_ldrfpq (code, dreg, sreg1, ins->inst_offset);
break;
case OP_XZERO:
arm_neon_eor_16b (code, dreg, dreg, dreg);
Expand Down
9 changes: 3 additions & 6 deletions src/mono/mono/mini/mini.c
Original file line number Diff line number Diff line change
Expand Up @@ -1502,18 +1502,15 @@ mono_allocate_stack_slots (MonoCompile *cfg, gboolean backward, guint32 *stack_s
* Align the size too so the code generated for passing vtypes in
* registers doesn't overwrite random locals.
*/
size = (size + (align - 1)) & ~(align -1);
size = ALIGN_TO (size, align);
}

if (backward) {
offset += size;
offset += align - 1;
offset &= ~(align - 1);
offset = ALIGN_TO (size, align);
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
slot = offset;
}
else {
offset += align - 1;
offset &= ~(align - 1);
offset = ALIGN_TO (0, align);
slot = offset;
fanyang-mono marked this conversation as resolved.
Show resolved Hide resolved
offset += size;
}
Expand Down