Skip to content

Commit

Permalink
Generate a bitset in the interpreter's codegen that identifies which …
Browse files Browse the repository at this point in the history
…spaces in the stack have had their address taken

Use the interpreter bitset for jiterpreter null check optimization
Enable null check optimization for all traces
  • Loading branch information
kg committed Mar 30, 2023
1 parent 451d84e commit 808ac5b
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 51 deletions.
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/interp-internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ struct InterpMethod {
unsigned int contains_traces : 1;
guint16 *backward_branch_offsets;
unsigned int backward_branch_offsets_count;
MonoBitSet *global_variable_bits;
#endif
#if PROFILE_INTERP
long calls;
Expand Down
34 changes: 34 additions & 0 deletions src/mono/mono/mini/interp/jiterpreter.c
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,30 @@ jiterp_insert_entry_points (void *_imethod, void *_td)
// it's big enough we'll be able to insert another entry point right away.
instruction_count += bb->in_count;
}

if (imethod->contains_traces) {
// Fill the bitset that identifies ldloca targets for the jiterpreter null check optimization
// FIXME: Remove this once the null check optimization analysis is moved into the interpreter
guint32 bitset_size = td->total_locals_size / MINT_STACK_SLOT_SIZE;
for (unsigned int i = 0; i < td->locals_size; i++) {
InterpLocal *loc = &(td->locals [i]);
if ((loc->flags & INTERP_LOCAL_FLAG_ADDRESS_TAKEN) != INTERP_LOCAL_FLAG_ADDRESS_TAKEN)
continue;

// Allocate on demand so if a method contains no ldlocas we don't allocate the bitset
if (!imethod->global_variable_bits)
imethod->global_variable_bits = mono_bitset_new (bitset_size, 0);

// Ensure that every bit in the global variable set corresponding to space occupied
// by this local is set, so that large locals (structs etc) being ldloca'd properly
// sets the whole range covered by the struct as a no-go for optimization.
// FIXME: Do this per slot instead of per byte.
for (int j = 0; j < loc->size; j++) {
guint32 b = (loc->offset + j) / MINT_STACK_SLOT_SIZE;
mono_bitset_set (imethod->global_variable_bits, b);
}
}
}
}

EMSCRIPTEN_KEEPALIVE double
Expand Down Expand Up @@ -1446,6 +1470,16 @@ mono_jiterp_boost_back_branch_target (guint16 *ip) {
*/
}

EMSCRIPTEN_KEEPALIVE int
mono_jiterp_is_imethod_var_global (InterpMethod *imethod, int offset) {
g_assert (imethod);
g_assert (offset >= 0);
if (!imethod->global_variable_bits)
return FALSE;

return mono_bitset_test (imethod->global_variable_bits, offset / MINT_STACK_SLOT_SIZE);
}

// HACK: fix C4206
EMSCRIPTEN_KEEPALIVE
#endif // HOST_BROWSER
Expand Down
1 change: 1 addition & 0 deletions src/mono/mono/mini/interp/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -10315,6 +10315,7 @@ initialize_global_vars (TransformData *td)
alloc_global_var_offset (td, var);
td->locals [var].flags |= INTERP_LOCAL_FLAG_GLOBAL;
}
td->locals [var].flags |= INTERP_LOCAL_FLAG_ADDRESS_TAKEN;
}
foreach_local_var (td, ins, (gpointer)(gsize)bb->index, initialize_global_var_cb);
}
Expand Down
2 changes: 2 additions & 0 deletions src/mono/mono/mini/interp/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64
// We use this flag to avoid addition of align field in InterpLocal, for now
#define INTERP_LOCAL_FLAG_SIMD 128
// This flag is set for all locals targeted by a LDLOCA opcode, used by jiterpreter
#define INTERP_LOCAL_FLAG_ADDRESS_TAKEN 256

typedef struct _InterpInst InterpInst;
typedef struct _InterpBasicBlock InterpBasicBlock;
Expand Down
2 changes: 2 additions & 0 deletions src/mono/wasm/runtime/cwraps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ const fn_signatures: SigLine[] = [
[true, "mono_jiterp_get_polling_required_address", "number", []],
[true, "mono_jiterp_get_rejected_trace_count", "number", []],
[true, "mono_jiterp_boost_back_branch_target", "void", ["number"]],
[true, "mono_jiterp_is_imethod_var_global", "number", ["number", "number"]],
...legacy_interop_cwraps
];

Expand Down Expand Up @@ -242,6 +243,7 @@ export interface t_Cwraps {
mono_jiterp_write_number_unaligned(destination: VoidPtr, value: number, mode: number): void;
mono_jiterp_get_rejected_trace_count(): number;
mono_jiterp_boost_back_branch_target(destination: number): void;
mono_jiterp_is_imethod_var_global(imethod: VoidPtr, offsetBytes: number): number;
}

const wrapped_c_functions: t_Cwraps = <any>{};
Expand Down
1 change: 1 addition & 0 deletions src/mono/wasm/runtime/jiterpreter-support.ts
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ export class WasmBuilder {
argumentCount!: number;
activeBlocks!: number;
base!: MintOpcodePtr;
frame: NativePointer = <any>0;
traceBuf: Array<string> = [];
branchTargets = new Set<MintOpcodePtr>();
options!: JiterpreterOptions;
Expand Down
82 changes: 43 additions & 39 deletions src/mono/wasm/runtime/jiterpreter-trace-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,22 @@ function getArgF64 (ip: MintOpcodePtr, indexPlusOne: number) {
return getF64_unaligned(src);
}

function get_imethod_data (frame: NativePointer, index: number) {
function get_imethod (frame: NativePointer) {
// FIXME: Encoding this data directly into the trace will prevent trace reuse
const iMethod = getU32_unaligned(<any>frame + getMemberOffset(JiterpMember.Imethod));
const pData = getU32_unaligned(iMethod + getMemberOffset(JiterpMember.DataItems));
return iMethod;
}

function get_imethod_data (frame: NativePointer, index: number) {
// FIXME: Encoding this data directly into the trace will prevent trace reuse
const pData = getU32_unaligned(get_imethod(frame) + getMemberOffset(JiterpMember.DataItems));
const dataOffset = pData + (index * sizeOfDataItem);
return getU32_unaligned(dataOffset);
}

function get_imethod_clause_data_offset (frame: NativePointer, index: number) {
// FIXME: Encoding this data directly into the trace will prevent trace reuse
const iMethod = getU32_unaligned(<any>frame + getMemberOffset(JiterpMember.Imethod));
const pData = getU32_unaligned(iMethod + getMemberOffset(JiterpMember.ClauseDataOffsets));
const pData = getU32_unaligned(get_imethod(frame) + getMemberOffset(JiterpMember.ClauseDataOffsets));
const dataOffset = pData + (index * sizeOfDataItem);
return getU32_unaligned(dataOffset);
}
Expand All @@ -138,7 +142,7 @@ function is_backward_branch_target (
return false;
}

export function generate_wasm_body (
export function generateWasmBody (
frame: NativePointer, traceName: string, ip: MintOpcodePtr,
startOfBody: MintOpcodePtr, endOfBody: MintOpcodePtr,
builder: WasmBuilder, instrumentedTraceId: number,
Expand All @@ -152,7 +156,6 @@ export function generate_wasm_body (
conditionalOpcodeCounter = 0;
const traceIp = ip;

addressTakenLocals.clear();
eraseInferredState();

// Skip over the enter opcode
Expand Down Expand Up @@ -487,23 +490,23 @@ export function generate_wasm_body (
}
case MintOpcode.MINT_LDOBJ_VT: {
const size = getArgU16(ip, 3);
append_ldloca(builder, getArgU16(ip, 1), size, true);
append_ldloca(builder, getArgU16(ip, 1), size);
append_ldloc_cknull(builder, getArgU16(ip, 2), ip, true);
append_memmove_dest_src(builder, size);
break;
}
case MintOpcode.MINT_STOBJ_VT: {
const klass = get_imethod_data(frame, getArgU16(ip, 3));
append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load);
append_ldloca(builder, getArgU16(ip, 2), 0, true);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.ptr_const(klass);
builder.callImport("value_copy");
break;
}
case MintOpcode.MINT_STOBJ_VT_NOREF: {
const sizeBytes = getArgU16(ip, 3);
append_ldloc(builder, getArgU16(ip, 1), WasmOpcode.i32_load);
append_ldloca(builder, getArgU16(ip, 2), 0, true);
append_ldloca(builder, getArgU16(ip, 2), 0);
append_memmove_dest_src(builder, sizeBytes);
break;
}
Expand Down Expand Up @@ -627,7 +630,7 @@ export function generate_wasm_body (
append_bailout(builder, ip, BailoutReason.SpanOperationFailed);
builder.endBlock();
// gpointer span = locals + ip [1];
append_ldloca(builder, getArgU16(ip, 1), 16, true);
append_ldloca(builder, getArgU16(ip, 1), 16);
builder.local("math_lhs32", WasmOpcode.tee_local);
// *(gpointer*)span = ptr;
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
Expand All @@ -643,13 +646,13 @@ export function generate_wasm_body (

case MintOpcode.MINT_LD_DELEGATE_METHOD_PTR: {
// FIXME: ldloca invalidation size
append_ldloca(builder, getArgU16(ip, 1), 8, true);
append_ldloca(builder, getArgU16(ip, 2), 8, true);
append_ldloca(builder, getArgU16(ip, 1), 8);
append_ldloca(builder, getArgU16(ip, 2), 8);
builder.callImport("ld_del_ptr");
break;
}
case MintOpcode.MINT_LDTSFLDA: {
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
// This value is unsigned but I32 is probably right
builder.ptr_const(getArgI32(ip, 2));
builder.callImport("ldtsflda");
Expand All @@ -658,7 +661,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_INTRINS_GET_TYPE:
builder.block();
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.callImport("gettype");
// bailout if gettype failed
Expand All @@ -670,7 +673,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_INTRINS_ENUM_HASFLAG: {
const klass = get_imethod_data(frame, getArgU16(ip, 4));
builder.ptr_const(klass);
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
append_ldloca(builder, getArgU16(ip, 3), 0);
builder.callImport("hasflag");
Expand Down Expand Up @@ -738,7 +741,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_ARRAY_ELEMENT_SIZE: {
builder.block();
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.callImport(opcode === MintOpcode.MINT_ARRAY_RANK ? "array_rank" : "a_elesize");
// If the array was null we will bail out, otherwise continue
Expand All @@ -757,7 +760,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_ISINST_INTERFACE: {
builder.block();
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
// klass
builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3)));
Expand All @@ -777,7 +780,7 @@ export function generate_wasm_body (
// MonoVTable *vtable = (MonoVTable*)frame->imethod->data_items [ip [3]];
builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3)));
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.i32_const(opcode === MintOpcode.MINT_BOX_VT ? 1 : 0);
builder.callImport("box");
Expand All @@ -788,7 +791,7 @@ export function generate_wasm_body (
// MonoClass *c = (MonoClass*)frame->imethod->data_items [ip [3]];
builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 3)));
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.callImport("try_unbox");
// If the unbox operation succeeded, continue, otherwise bailout
Expand All @@ -801,7 +804,7 @@ export function generate_wasm_body (

case MintOpcode.MINT_NEWSTR: {
builder.block();
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
builder.callImport("newstr");
// If the newstr operation succeeded, continue, otherwise bailout
Expand All @@ -817,7 +820,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_NEWOBJ_INLINED: {
builder.block();
// MonoObject *o = mono_gc_alloc_obj (vtable, m_class_get_instance_size (vtable->klass));
append_ldloca(builder, getArgU16(ip, 1), 4, true);
append_ldloca(builder, getArgU16(ip, 1), 4);
builder.ptr_const(get_imethod_data(frame, getArgU16(ip, 2)));
// LOCAL_VAR (ip [1], MonoObject*) = o;
builder.callImport("newobj_i");
Expand All @@ -832,11 +835,11 @@ export function generate_wasm_body (
case MintOpcode.MINT_NEWOBJ_VT_INLINED: {
const ret_size = getArgU16(ip, 3);
// memset (this_vt, 0, ret_size);
append_ldloca(builder, getArgU16(ip, 2), ret_size, true);
append_ldloca(builder, getArgU16(ip, 2), ret_size);
append_memset_dest(builder, 0, ret_size);
// LOCAL_VAR (ip [1], gpointer) = this_vt;
builder.local("pLocals");
append_ldloca(builder, getArgU16(ip, 2), ret_size, true);
append_ldloca(builder, getArgU16(ip, 2), ret_size);
append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
break;
}
Expand Down Expand Up @@ -942,7 +945,7 @@ export function generate_wasm_body (
case MintOpcode.MINT_CONV_OVF_U4_I4:
builder.block();
// dest, src
append_ldloca(builder, getArgU16(ip, 1), 8, true);
append_ldloca(builder, getArgU16(ip, 1), 8);
append_ldloca(builder, getArgU16(ip, 2), 0);
builder.i32_const(opcode);
builder.callImport("conv");
Expand Down Expand Up @@ -1071,7 +1074,7 @@ export function generate_wasm_body (
append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load); // dest
append_ldloca(builder, getArgU16(ip, 3), 0); // newVal
append_ldloca(builder, getArgU16(ip, 4), 0); // expected
append_ldloca(builder, getArgU16(ip, 1), 8, true); // oldVal
append_ldloca(builder, getArgU16(ip, 1), 8); // oldVal
builder.callImport("cmpxchg_i64");
break;

Expand Down Expand Up @@ -1308,7 +1311,6 @@ export function generate_wasm_body (
return result;
}

const addressTakenLocals : Set<number> = new Set();
const notNullSince : Map<number, number> = new Map();
let cknullOffset = -1;

Expand Down Expand Up @@ -1359,14 +1361,12 @@ function append_stloc_tail (builder: WasmBuilder, offset: number, opcode: WasmOp
// used for writes
// Pass transient=true if the address will not persist after use (so it can't be used to later
// modify the contents of this local)
function append_ldloca (builder: WasmBuilder, localOffset: number, bytesInvalidated?: number, transient?: boolean) {
function append_ldloca (builder: WasmBuilder, localOffset: number, bytesInvalidated?: number) {
if (typeof (bytesInvalidated) !== "number")
bytesInvalidated = 512;
// FIXME: We need to know how big this variable is so we can invalidate the whole space it occupies
if (bytesInvalidated > 0)
invalidate_local_range(localOffset, bytesInvalidated);
if ((bytesInvalidated > 0) && (transient !== true))
addressTakenLocals.add(localOffset);
builder.lea("pLocals", localOffset);
}

Expand All @@ -1378,7 +1378,7 @@ function append_memset_local (builder: WasmBuilder, localOffset: number, value:
return;

// spec: pop n, pop val, pop d, fill from d[0] to d[n] with value val
append_ldloca(builder, localOffset, count, true);
append_ldloca(builder, localOffset, count);
append_memset_dest(builder, value, count);
}

Expand All @@ -1389,16 +1389,20 @@ function append_memmove_local_local (builder: WasmBuilder, destLocalOffset: numb
return true;

// spec: pop n, pop s, pop d, copy n bytes from s to d
append_ldloca(builder, destLocalOffset, count, true);
append_ldloca(builder, destLocalOffset, count);
append_ldloca(builder, sourceLocalOffset, 0);
append_memmove_dest_src(builder, count);
}

function isAddressTaken (builder: WasmBuilder, localOffset: number) {
return cwraps.mono_jiterp_is_imethod_var_global(<any>get_imethod(builder.frame), localOffset) !== 0;
}

// Loads the specified i32 value and then bails out if it is null, leaving it in the cknull_ptr local.
function append_ldloc_cknull (builder: WasmBuilder, localOffset: number, ip: MintOpcodePtr, leaveOnStack: boolean) {
const optimize = builder.allowNullCheckOptimization &&
!addressTakenLocals.has(localOffset) &&
notNullSince.has(localOffset);
notNullSince.has(localOffset) &&
!isAddressTaken(builder, localOffset);

if (optimize) {
counters.nullChecksEliminated++;
Expand Down Expand Up @@ -1437,8 +1441,8 @@ function append_ldloc_cknull (builder: WasmBuilder, localOffset: number, ip: Min
builder.local("cknull_ptr");

if (
!addressTakenLocals.has(localOffset) &&
builder.allowNullCheckOptimization
builder.allowNullCheckOptimization &&
!isAddressTaken(builder, localOffset)
) {
notNullSince.set(localOffset, <any>ip);
if (traceNullCheckOptimizations)
Expand Down Expand Up @@ -1620,8 +1624,8 @@ function emit_fieldop (

// Check this before potentially emitting a cknull
const notNull = builder.allowNullCheckOptimization &&
!addressTakenLocals.has(objectOffset) &&
notNullSince.has(objectOffset);
notNullSince.has(objectOffset) &&
!isAddressTaken(builder, objectOffset);

if (
(opcode !== MintOpcode.MINT_LDFLDA_UNSAFE) &&
Expand Down Expand Up @@ -1723,7 +1727,7 @@ function emit_fieldop (
case MintOpcode.MINT_LDFLD_VT: {
const sizeBytes = getArgU16(ip, 4);
// dest
append_ldloca(builder, localOffset, sizeBytes, true);
append_ldloca(builder, localOffset, sizeBytes);
// src
builder.local("cknull_ptr");
builder.i32_const(fieldOffset);
Expand Down Expand Up @@ -1860,7 +1864,7 @@ function emit_sfieldop (
case MintOpcode.MINT_LDSFLD_VT: {
const sizeBytes = getArgU16(ip, 4);
// dest
append_ldloca(builder, localOffset, sizeBytes, true);
append_ldloca(builder, localOffset, sizeBytes);
// src
builder.ptr_const(pStaticData);
append_memmove_dest_src(builder, sizeBytes);
Expand Down
Loading

0 comments on commit 808ac5b

Please sign in to comment.