From c251d3d298bc116e05a989ab6fa328bd4dcd0ec6 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Fri, 20 Aug 2021 15:34:08 -0400 Subject: [PATCH] set ssaflags on `CodeInfo` construction --- base/compiler/optimize.jl | 72 +++++++++------------------------ base/compiler/ssair/slot2ssa.jl | 6 +-- src/julia.h | 3 +- src/method.c | 62 +++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 59 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 3b7b437ce6c6d..a40b5b6187d0c 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -134,12 +134,14 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError # const SLOT_CALLED = 64 -# This statement was marked as @inbounds by the user. If replaced by inlining, -# any contained boundschecks may be removed -const IR_FLAG_INBOUNDS = 0x01 -# This statement was marked as @inline by the user +# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c + +# This statement is marked as @inbounds by user. +# Ff replaced by inlining, any contained boundschecks may be removed. +const IR_FLAG_INBOUNDS = 0x01 << 0 +# This statement is marked as @inline by user const IR_FLAG_INLINE = 0x01 << 1 -# This statement was marked as @noinline by the user +# This statement is marked as @noinline by user const IR_FLAG_NOINLINE = 0x01 << 2 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. @@ -374,75 +376,37 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: end renumber_ir_elements!(code, changemap, labelmap) - inbounds_depth = 0 # Number of stacked inbounds - inline_flags = BitVector() meta = Any[] - flags = fill(0x00, length(code)) for i = 1:length(code) - stmt = code[i] - if isexpr(stmt, :inbounds) - arg1 = stmt.args[1] - if arg1 === true # push - inbounds_depth += 1 - elseif arg1 === false # clear - inbounds_depth = 0 - elseif inbounds_depth > 0 # pop - inbounds_depth -= 1 - end - stmt = nothing - elseif isexpr(stmt, :inline) - if stmt.args[1]::Bool - push!(inline_flags, true) - else - pop!(inline_flags) - end - stmt = nothing - elseif isexpr(stmt, :noinline) - if stmt.args[1]::Bool - push!(inline_flags, false) - else - pop!(inline_flags) - end - stmt = nothing - else - stmt = normalize(stmt, meta) - end - code[i] = stmt - if stmt !== nothing - if inbounds_depth > 0 - flags[i] |= IR_FLAG_INBOUNDS - end - if !isempty(inline_flags) - if last(inline_flags) - flags[i] |= IR_FLAG_INLINE - else - flags[i] |= IR_FLAG_NOINLINE - end - end - end + code[i] = remove_meta!(code[i], meta) end - @assert isempty(inline_flags) "malformed meta flags" - strip_trailing_junk!(ci, code, stmtinfo, flags) + strip_trailing_junk!(ci, code, stmtinfo) cfg = compute_basic_blocks(code) types = Any[] - stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags) + stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, ci.ssaflags) ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes) return ir end -function normalize(@nospecialize(stmt), meta::Vector{Any}) +function remove_meta!(@nospecialize(stmt), meta::Vector{Any}) if isa(stmt, Expr) - if stmt.head === :meta + head = stmt.head + if head === :meta args = stmt.args if length(args) > 0 push!(meta, stmt) end return nothing + elseif is_ssaflag_head(head) + # we processed these flags in `jl_code_info_set_ir` + return nothing end end return stmt end +is_ssaflag_head(head::Symbol) = head === :inbounds || head === :inline || head === :noinline + function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code svdef = sv.linfo.def diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 6ca17be2f0a83..d02323c8a90e0 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -183,7 +183,7 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt) end -function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}, flags::Vector{UInt8}) +function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}) # Remove `nothing`s at the end, we don't handle them well # (we expect the last instruction to be a terminator) ssavaluetypes = ci.ssavaluetypes::Vector{Any} @@ -193,7 +193,7 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any} resize!(ssavaluetypes, i) resize!(ci.codelocs, i) resize!(info, i) - resize!(flags, i) + resize!(ci.ssaflags, i) break end end @@ -205,7 +205,7 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any} push!(ssavaluetypes, Union{}) push!(ci.codelocs, 0) push!(info, nothing) - push!(flags, 0x00) + push!(ci.ssaflags, 0x00) end nothing end diff --git a/src/julia.h b/src/julia.h index e6edc71b42dfa..e53b33bef674d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -255,7 +255,8 @@ typedef struct _jl_code_info_t { jl_value_t *ssavaluetypes; // types of ssa values (or count of them) jl_array_t *ssaflags; // flags associated with each statement: // 0 = inbounds - // 1,2 = inlinehint,always-inline,noinline + // 1 = inline + // 2 = noinline // 3 = strict-ieee (strictfp) // 4 = effect-free (may be deleted if unused) // 5-6 = diff --git a/src/method.c b/src/method.c index 852a7ff88208a..01695b8b5b1e2 100644 --- a/src/method.c +++ b/src/method.c @@ -254,6 +254,11 @@ void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *spar } } +jl_value_t* expr_arg1(jl_value_t* expr) { + jl_array_t *args = ((jl_expr_t*)expr)->args; + return jl_array_ptr_ref(args, 0); +} + // copy a :lambda Expr into its CodeInfo representation, // including popping of known meta nodes static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) @@ -275,8 +280,17 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) jl_gc_wb(li, li->code); size_t n = jl_array_len(body); jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code); + li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n); + jl_gc_wb(li, li->ssaflags); + int inbounds_depth = 0; // number of stacked inbounds + // isempty(inline_flags): no user annotation + // last(inline_flags) == 1: inline region + // last(inline_flags) == 0: noinline region + arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0); for (j = 0; j < n; j++) { jl_value_t *st = bd[j]; + int is_flag_stmt = 0; + // check :meta expression if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) { size_t k, ins = 0, na = jl_expr_nargs(st); jl_array_t *meta = ((jl_expr_t*)st)->args; @@ -298,10 +312,57 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) else jl_array_del_end(meta, na - ins); } + // check other flag expressions + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inbounds_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // push + inbounds_depth += 1; + else if (arg1 == (jl_value_t*)jl_false) // clear + inbounds_depth = 0; + else if (inbounds_depth > 0) // pop + inbounds_depth -= 1; + } + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inline_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // enter inline region + arraylist_push(inline_flags, (void*)1); + else { // exit inline region + assert(arg1 == (jl_value_t*)jl_false); + arraylist_pop(inline_flags); + } + } + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == noinline_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // enter noinline region + arraylist_push(inline_flags, (void*)0); + else { // exit noinline region + assert(arg1 == (jl_value_t*)jl_false); + arraylist_pop(inline_flags); + } + } else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) { jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0))); } + + if (is_flag_stmt) + jl_array_uint8_set(li->ssaflags, j, 0); + else { + uint8_t flag = 0; + if (inbounds_depth > 0) + flag |= 1 << 0; + if (inline_flags->len > 0) { + void* inline_flag = inline_flags->items[inline_flags->len - 1]; + flag |= 1 << (inline_flag ? 1 : 2); + } + jl_array_uint8_set(li->ssaflags, j, flag); + } } + assert(inline_flags->len == 0); // malformed otherwise + arraylist_free(inline_flags); + free(inline_flags); jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1); jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0); size_t nslots = jl_array_len(vis); @@ -314,7 +375,6 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) jl_gc_wb(li, li->slotflags); li->ssavaluetypes = jl_box_long(nssavalue); jl_gc_wb(li, li->ssavaluetypes); - li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, 0); // Flags that need to be copied to slotflags const uint8_t vinfo_mask = 8 | 16 | 32 | 64;