Skip to content

Commit

Permalink
Implement complete PackedSimd feature set in interpreter
Browse files Browse the repository at this point in the history
Custom PackedSimd.Shuffle in jiterp
Simplify definition of intrinsics with custom C implementations
Clean up dummy interp dregs
Many other commits squashed
  • Loading branch information
kg committed Jul 1, 2023
1 parent 1db4357 commit 1b237a2
Show file tree
Hide file tree
Showing 9 changed files with 966 additions and 314 deletions.
387 changes: 293 additions & 94 deletions src/mono/mono/mini/interp/interp-simd-intrins.def

Large diffs are not rendered by default.

261 changes: 249 additions & 12 deletions src/mono/mono/mini/interp/interp-simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -585,42 +585,276 @@ _interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
g_assert_not_reached ();
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
#define LANE_COUNT(lane_type) (sizeof(v128_t) / sizeof(lane_type))

// ensure the lane is valid by wrapping it (in AOT it would fail to compile)
#define WRAP_LANE(lane_type, lane_ptr) \
*((unsigned char *)lane_ptr) & (LANE_COUNT(lane_type) - 1)

#define EXTRACT_LANE(result_type, lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
*((result_type *)res) = ((lane_type *)vec)[_lane];

#define REPLACE_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
v128_t temp = *((v128_t *)vec); \
((lane_type *)&temp)[_lane] = *(lane_type *)value; \
*((v128_t *)res) = temp;

static void
interp_packedsimd_extractlane_i1 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint8);
}

static void
interp_packedsimd_extractlane_u1 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, guint8);
}

static void
interp_packedsimd_extractlane_i2 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint16);
}

static void
interp_packedsimd_extractlane_u2 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, guint16);
}

static void
interp_packedsimd_extractlane_i4 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint32, gint32);
}

static void
interp_packedsimd_extractlane_i8 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(gint64, gint64);
}

static void
interp_packedsimd_extractlane_r4 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(float, float);
}

static void
interp_packedsimd_extractlane_r8 (gpointer res, gpointer vec, gpointer lane) {
EXTRACT_LANE(double, double);
}

static void
interp_packedsimd_replacelane_i1 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint8);
}

static void
interp_packedsimd_replacelane_i2 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint16);
}

static void
interp_packedsimd_replacelane_i4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint32);
}

static void
interp_packedsimd_replacelane_i8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(gint64);
}

static void
interp_packedsimd_replacelane_r4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(float);
}

static void
interp_packedsimd_replacelane_r8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
REPLACE_LANE(double);
}

static void
interp_packedsimd_shuffle (gpointer res, gpointer _lower, gpointer _upper, gpointer _indices) {
v128_i1 indices = *((v128_i1 *)_indices),
lower = *((v128_i1 *)_lower),
upper = *((v128_i1 *)_upper),
result = { 0 };

for (int i = 0; i < 16; i++) {
int index = indices[i] & 31;
if (index > 15)
result[i] = upper[index - 16];
else
result[i] = lower[index];
}

*((v128_i1 *)res) = result;
}

#define INDIRECT_LOAD(fn) \
*(v128_t*)res = fn(*(void **)addr_of_addr);

static void
interp_packedsimd_load128 (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load);
}

static void
interp_packedsimd_load32_zero (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load32_zero);
}

static void
interp_packedsimd_load64_zero (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load64_zero);
}

static void
interp_packedsimd_load8_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load8_splat);
}

static void
interp_packedsimd_load16_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load16_splat);
}

static void
interp_packedsimd_load32_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load32_splat);
}

static void
interp_packedsimd_load64_splat (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_v128_load64_splat);
}

static void
interp_packedsimd_load8x8_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i16x8_load8x8);
}

static void
interp_packedsimd_load8x8_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u16x8_load8x8);
}

static void
interp_packedsimd_load16x4_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i32x4_load16x4);
}

static void
interp_packedsimd_load16x4_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u32x4_load16x4);
}

static void
interp_packedsimd_load32x2_s (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_i64x2_load32x2);
}

static void
interp_packedsimd_load32x2_u (gpointer res, gpointer addr_of_addr) {
INDIRECT_LOAD(wasm_u64x2_load32x2);
}

static void
interp_packedsimd_store (gpointer res, gpointer addr_of_addr, gpointer vec) {
// HACK: Result is unused because Store has a void return value
**(v128_t **)addr_of_addr = *(v128_t *)vec;
}

#define INDIRECT_STORE_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
**(lane_type **)addr_of_addr = ((lane_type *)vec)[_lane];

static void
interp_packedsimd_store8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint8);
}

static void
interp_packedsimd_store16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint16);
}

static void
interp_packedsimd_store32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint32);
}

static void
interp_packedsimd_store64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_STORE_LANE(guint64);
}

#define INDIRECT_LOAD_LANE(lane_type) \
int _lane = WRAP_LANE(lane_type, lane); \
/* we need temporary storage to do this since res may be the same as vec, addr_of_addr, or lane */ \
lane_type lanes[LANE_COUNT(lane_type)]; \
memcpy (lanes, vec, 16); \
lanes[_lane] = **(lane_type **)addr_of_addr; \
memcpy (res, lanes, 16);

static void
interp_packedsimd_load8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint8);
}

static void
interp_packedsimd_load16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint16);
}

static void
interp_packedsimd_load32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint32);
}

static void
interp_packedsimd_load64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
INDIRECT_LOAD_LANE(guint64);
}

#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (v1); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \
static void \
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
}

#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode)
#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode)
#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode)

#include "interp-simd-intrins.def"

#undef INTERP_WASM_SIMD_INTRINSIC_V_P
Expand All @@ -629,6 +863,9 @@ _mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) {
#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
#undef INTERP_WASM_SIMD_INTRINSIC_V_C3

// Now generate the wasm opcode tables for the intrinsics

Expand Down
Loading

0 comments on commit 1b237a2

Please sign in to comment.