diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index 03fbf93f931ea..b20733dcb0b90 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -233,7 +233,7 @@ typedef enum { #define MINT_IS_SIMD_CREATE(op) ((op) >= MINT_SIMD_V128_I1_CREATE && (op) <= MINT_SIMD_V128_I8_CREATE) // TODO Add more -#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDPTR || op == MINT_BOX) +#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_LDC_R4 || op == MINT_LDC_R8 || op == MINT_LDPTR || op == MINT_BOX) #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 diff --git a/src/mono/mono/mini/interp/simd-methods.def b/src/mono/mono/mini/interp/simd-methods.def index 4eb76e178558b..bde00dbab9be9 100644 --- a/src/mono/mono/mini/interp/simd-methods.def +++ b/src/mono/mono/mini/interp/simd-methods.def @@ -1,3 +1,4 @@ +SIMD_METHOD2(".ctor", ctor) SIMD_METHOD(get_Count) SIMD_METHOD(get_AllBitsSet) SIMD_METHOD(get_IsHardwareAccelerated) diff --git a/src/mono/mono/mini/interp/transform-simd.c b/src/mono/mono/mini/interp/transform-simd.c index cbdf7ccfc2cfe..41e49405ef7e0 100644 --- a/src/mono/mono/mini/interp/transform-simd.c +++ b/src/mono/mono/mini/interp/transform-simd.c @@ -14,16 +14,21 @@ #define MSGSTRFIELD1(line) str##line static const struct msgstr_t { #define SIMD_METHOD(name) char MSGSTRFIELD(__LINE__) [sizeof (#name)]; +#define SIMD_METHOD2(str,name) char MSGSTRFIELD(__LINE__) [sizeof (str)]; #include "simd-methods.def" #undef SIMD_METHOD +#undef SIMD_METHOD2 } method_names = { #define SIMD_METHOD(name) #name, +#define SIMD_METHOD2(str,name) str, #include "simd-methods.def" #undef SIMD_METHOD +#undef SIMD_METHOD2 }; enum { #define SIMD_METHOD(name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)), +#define SIMD_METHOD2(str,name) SN_ ## name = offsetof (struct msgstr_t, MSGSTRFIELD(__LINE__)), #include "simd-methods.def" }; @@ -91,6 +96,7 @@ static guint16 sri_vector128_t_methods [] = { }; static guint16 sn_vector_t_methods [] = { + SN_ctor, SN_get_AllBitsSet, SN_get_Count, SN_get_One, @@ -157,6 +163,12 @@ emit_common_simd_operations (TransformData *td, int id, int atype, int vector_si for (int i = 0; i < vector_size / arg_size; i++) data [i] = 1; return TRUE; + } else if (atype == MONO_TYPE_R4) { + interp_add_ins (td, MINT_SIMD_V128_LDC); + float *data = (float*)&td->last_ins->data [0]; + for (int i = 0; i < vector_size / arg_size; i++) + data [i] = 1.0f; + return TRUE; } break; case SN_get_Zero: @@ -310,6 +322,31 @@ emit_common_simd_epilogue (TransformData *td, MonoClass *vector_klass, MonoMetho td->ip += 5; } +static void +emit_vector_create (TransformData *td, MonoMethodSignature *csignature, MonoClass *vector_klass, int vector_size) +{ + int num_args = csignature->param_count; + if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE); + else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE); + else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE); + else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE); + else g_assert_not_reached (); + + // We use call args machinery since we have too many args + interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); + int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int)); + td->sp -= csignature->param_count; + for (int i = 0; i < num_args; i++) + call_args [i] = td->sp [i].local; + call_args [num_args] = -1; + init_last_ins_call (td); + td->last_ins->info.call_info->call_args = call_args; + if (!td->optimized) + td->last_ins->info.call_info->call_offset = get_tos_offset (td); + push_type_vt (td, vector_klass, vector_size); + interp_ins_set_dreg (td->last_ins, td->sp [-1].local); +} + static gboolean emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature) { @@ -352,26 +389,7 @@ emit_sri_vector128 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature else if (arg_size == 4) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I4_CREATE; else if (arg_size == 8) simd_intrins = INTERP_SIMD_INTRINSIC_V128_I8_CREATE; } else if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) { - int num_args = csignature->param_count; - if (num_args == 16) interp_add_ins (td, MINT_SIMD_V128_I1_CREATE); - else if (num_args == 8) interp_add_ins (td, MINT_SIMD_V128_I2_CREATE); - else if (num_args == 4) interp_add_ins (td, MINT_SIMD_V128_I4_CREATE); - else if (num_args == 2) interp_add_ins (td, MINT_SIMD_V128_I8_CREATE); - else g_assert_not_reached (); - - // We use call args machinery since we have too many args - interp_ins_set_sreg (td->last_ins, MINT_CALL_ARGS_SREG); - int *call_args = (int*)mono_mempool_alloc (td->mempool, (num_args + 1) * sizeof (int)); - td->sp -= csignature->param_count; - for (int i = 0; i < num_args; i++) - call_args [i] = td->sp [i].local; - call_args [num_args] = -1; - init_last_ins_call (td); - td->last_ins->info.call_info->call_args = call_args; - if (!td->optimized) - td->last_ins->info.call_info->call_offset = get_tos_offset (td); - push_type_vt (td, vector_klass, vector_size); - interp_ins_set_dreg (td->last_ins, td->sp [-1].local); + emit_vector_create (td, csignature, vector_klass, vector_size); td->ip += 5; return TRUE; } @@ -507,7 +525,7 @@ emit_sri_vector128_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignatur } static gboolean -emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature) +emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) { int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod); if (id == -1) @@ -518,14 +536,74 @@ emit_sn_vector_t (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *c // First argument is always vector MonoClass *vector_klass = cmethod->klass; + if (!m_class_is_simd_type (vector_klass)) + return FALSE; MonoTypeEnum atype; int vector_size, arg_size, scalar_arg; if (!get_common_simd_info (vector_klass, csignature, &atype, &vector_size, &arg_size, &scalar_arg)) return FALSE; - if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) + if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { + goto opcode_added; + } else if (id == SN_ctor) { + if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) { + emit_vector_create (td, csignature, vector_klass, vector_size); + if (!newobj) { + // If the ctor is called explicitly, then we need to store to the passed `this` + interp_emit_stobj (td, vector_klass, FALSE); + td->ip += 5; + } + return TRUE; + } + } + + if (simd_opcode == -1 || simd_intrins == -1) + return FALSE; + + interp_add_ins (td, simd_opcode); + td->last_ins->data [0] = simd_intrins; + +opcode_added: + emit_common_simd_epilogue (td, vector_klass, csignature, vector_size, FALSE); + return TRUE; +} + +static gboolean +emit_sn_vector4 (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) +{ + int id = lookup_intrins (sn_vector_t_methods, sizeof (sn_vector_t_methods), cmethod); + if (id == -1) + return FALSE; + + gint16 simd_opcode = -1; + gint16 simd_intrins = -1; + + // First argument is always vector + MonoClass *vector_klass = cmethod->klass; + + MonoTypeEnum atype = MONO_TYPE_R4; + int vector_size = SIZEOF_V128; + int arg_size = sizeof (float); + int scalar_arg = -1; + for (int i = 0; i < csignature->param_count; i++) { + if (csignature->params [i]->type != MONO_TYPE_GENERICINST) + scalar_arg = i; + } + + if (emit_common_simd_operations (td, id, atype, vector_size, arg_size, scalar_arg, &simd_opcode, &simd_intrins)) { goto opcode_added; + } else if (id == SN_ctor) { + if (csignature->param_count == vector_size / arg_size && atype == csignature->params [0]->type) { + emit_vector_create (td, csignature, vector_klass, vector_size); + if (!newobj) { + // If the ctor is called explicitly, then we need to store to the passed `this` + interp_emit_stobj (td, vector_klass, FALSE); + td->ip += 5; + } + return TRUE; + } + } if (simd_opcode == -1 || simd_intrins == -1) return FALSE; @@ -805,7 +883,7 @@ emit_sri_packedsimd (TransformData *td, MonoMethod *cmethod, MonoMethodSignature } static gboolean -interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature) +interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj) { const char *class_name; const char *class_ns; @@ -824,7 +902,9 @@ interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodS return emit_sri_vector128_t (td, cmethod, csignature); } else if (!strcmp (class_ns, "System.Numerics")) { if (!strcmp (class_name, "Vector`1")) - return emit_sn_vector_t (td, cmethod, csignature); + return emit_sn_vector_t (td, cmethod, csignature, newobj); + else if (!strcmp (class_name, "Vector4")) + return emit_sn_vector4 (td, cmethod, csignature, newobj); } else if (!strcmp (class_ns, "System.Runtime.Intrinsics.Wasm")) { if (!strcmp (class_name, "PackedSimd")) return emit_sri_packedsimd (td, cmethod, csignature); diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index ce047b2677bcf..4898c5968f6f8 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1981,7 +1981,7 @@ interp_handle_intrinsics (TransformData *td, MonoMethod *target_method, MonoClas const char *klass_name = m_class_get_name (target_method->klass); #ifdef INTERP_ENABLE_SIMD - if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature)) + if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, target_method, csignature, FALSE)) return TRUE; #endif @@ -6289,6 +6289,10 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header, init_last_ins_call (td); td->last_ins->info.call_info->call_offset = call_offset; } else { +#ifdef INTERP_ENABLE_SIMD + if ((mono_interp_opt & INTERP_OPT_SIMD) && interp_emit_simd_intrinsics (td, m, csignature, TRUE)) + break; +#endif td->sp -= csignature->param_count; // Move params types in temporary buffer @@ -9362,7 +9366,7 @@ write_v128_element (gpointer v128_addr, LocalValue *val, int index, int el_size) switch (el_size) { case 1: *(gint8*)el_addr = (gint8)val->i; break; case 2: *(gint16*)el_addr = (gint16)val->i; break; - case 4: *(gint32*)el_addr = val->i; break; + case 4: *(gint32*)el_addr = val->i; break; // this also handles r4 case 8: *(gint64*)el_addr = val->l; break; default: g_assert_not_reached (); @@ -9379,7 +9383,7 @@ interp_fold_simd_create (TransformData *td, InterpBasicBlock *cbb, LocalValue *l int var = args [index]; while (var != -1) { LocalValue *val = &local_defs [var]; - if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8) + if (val->type != LOCAL_VALUE_I4 && val->type != LOCAL_VALUE_I8 && val->type != LOCAL_VALUE_R4) return ins; index++; var = args [index]; @@ -9654,6 +9658,11 @@ interp_cprop (TransformData *td) } else if (MINT_IS_LDC_I8 (opcode)) { local_defs [dreg].type = LOCAL_VALUE_I8; local_defs [dreg].l = interp_get_const_from_ldc_i8 (ins); + } else if (opcode == MINT_LDC_R4) { + guint32 val_u = READ32 (&ins->data [0]); + float f = *(float*)(&val_u); + local_defs [dreg].type = LOCAL_VALUE_R4; + local_defs [dreg].f = f; } else if (ins->opcode == MINT_LDPTR) { #if SIZEOF_VOID_P == 8 local_defs [dreg].type = LOCAL_VALUE_I8; @@ -9824,6 +9833,26 @@ interp_cprop (TransformData *td) dump_interp_inst (ins, td->data_items); } } + } else if (opcode == MINT_STOBJ_VT || opcode == MINT_STOBJ_VT_NOREF) { + InterpInst *ldloca = local_defs [sregs [0]].ins; + if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { + int stsize = ins->data [0]; + int local = ldloca->sregs [0]; + + if (stsize == td->locals [local].size) { + // Replace LDLOCA + STOBJ_VT with MOV_VT + local_ref_count [sregs [0]]--; + ins->opcode = MINT_MOV_VT; + sregs [0] = sregs [1]; + ins->dreg = local; + needs_retry = TRUE; + + if (td->verbose_level) { + g_print ("Replace ldloca/stobj_vt pair :\n\t"); + dump_interp_inst (ins, td->data_items); + } + } + } } else if (MINT_IS_STIND (opcode)) { InterpInst *ldloca = local_defs [sregs [0]].ins; if (ldloca != NULL && ldloca->opcode == MINT_LDLOCA_S) { diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 7ff3f503a9fd4..7abf6da76837f 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -50,7 +50,8 @@ typedef struct #define LOCAL_VALUE_LOCAL 1 #define LOCAL_VALUE_I4 2 #define LOCAL_VALUE_I8 3 -#define LOCAL_VALUE_NON_NULL 4 +#define LOCAL_VALUE_R4 4 +#define LOCAL_VALUE_NON_NULL 5 // LocalValue contains data to construct an InterpInst that is equivalent with the contents // of the stack slot / local / argument. @@ -62,6 +63,7 @@ typedef struct { int local; gint32 i; gint64 l; + float f; }; // The instruction that writes this local. InterpInst *ins; @@ -381,6 +383,6 @@ mono_interp_print_td_code (TransformData *td); /* Forward definitions for simd methods */ static gboolean -interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature); +interp_emit_simd_intrinsics (TransformData *td, MonoMethod *cmethod, MonoMethodSignature *csignature, gboolean newobj); #endif /* __MONO_MINI_INTERP_TRANSFORM_H__ */