From 9c35a04f0d34ecbc9ded7ef85de3f093dd806ac9 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 23 Jun 2021 20:22:15 +0900 Subject: [PATCH 01/11] optimizer: supports callsite annotations of inlining, fixes #18773 Enable `@inline`/`@noinline` annotations on function callsites. From #40754. Now `@inline` and `@noinline` can be applied to a code block and then the compiler will try to (not) inline calls within the block: ```julia @inline f(...) # The compiler will try to inline `f` @inline f(...) + g(...) # The compiler will try to inline `f`, `g` and `+` @inline f(args...) = ... # Of course annotations on a definition is still allowed ``` Here are couple of notes on how those callsite annotations will work: - callsite annotation always has the precedence over the annotation applied to the definition of the called function, whichever we use `@inline`/`@noinline`: ```julia @inline function explicit_inline(args...) # body end let @noinline explicit_inline(args...) # this call will not be inlined end ``` - when callsite annotations are nested, the innermost annotations has the precedence ```julia @noinline let a0, b0 = ... a = @inline f(a0) # the compiler will try to inline this call b = notinlined(b0) # the compiler will NOT try to inline this call return a, b end ``` They're both tested and included in documentations. --- base/compiler/abstractinterpretation.jl | 11 +- base/compiler/optimize.jl | 59 +++++++-- base/compiler/ssair/inlining.jl | 60 +++++----- base/compiler/typeinfer.jl | 2 +- base/compiler/types.jl | 1 - base/compiler/utilities.jl | 5 +- base/compiler/validation.jl | 4 +- base/expr.jl | 130 ++++++++++++++++++-- base/meta.jl | 3 +- src/ast.scm | 2 +- src/codegen.cpp | 4 +- src/interpreter.c | 2 +- src/julia-syntax.scm | 6 +- src/macroexpand.scm | 2 +- src/method.c | 3 +- test/compiler/inline.jl | 151 +++++++++++++++++++++--- 16 files changed, 358 insertions(+), 87 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 4c3f7d46fd98b..b79b36978f312 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -592,7 +592,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me return nothing end mi = mi::MethodInstance - if !force && !const_prop_methodinstance_heuristic(interp, method, mi) + if !force && !const_prop_methodinstance_heuristic(interp, match, mi) add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic") return nothing end @@ -696,7 +696,8 @@ end # This is a heuristic to avoid trying to const prop through complicated functions # where we would spend a lot of time, but are probably unlikely to get an improved # result anyway. -function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance) +function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance) + method = match.method if method.is_for_opaque_closure # Not inlining an opaque closure can be very expensive, so be generous # with the const-prop-ability. It is quite possible that we can't infer @@ -714,7 +715,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method if isdefined(code, :inferred) && !cache_inlineable cache_inf = code.inferred if !(cache_inf === nothing) - cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing + # TODO maybe we want to respect callsite `@inline`/`@noinline` annotations here ? + cache_inlineable = inlining_policy(interp, cache_inf, 0x00, match) !== nothing end end if !cache_inlineable @@ -1908,7 +1910,8 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) if isa(fname, SlotNumber) changes = StateUpdate(fname, VarState(Any, false), changes, false) end - elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect + elseif hd === :code_coverage_effect || + (hd !== :boundscheck && hd !== nothing && is_meta_expr_head(hd)) # :boundscheck can be narrowed to Bool # these do not generate code else t = abstract_eval_statement(interp, stmt, changes, frame) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 1898aa8b75778..3b7b437ce6c6d 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -21,21 +21,30 @@ function push!(et::EdgeTracker, ci::CodeInstance) push!(et, ci.def) end -struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P} +struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter} params::OptimizationParams et::S mi_cache::T - policy::P + interp::I end -function default_inlining_policy(@nospecialize(src)) +function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8, match::Union{MethodMatch,InferenceResult}) if isa(src, CodeInfo) || isa(src, Vector{UInt8}) src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) - src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) + src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) return src_inferred && src_inlineable ? src : nothing - end - if isa(src, OptimizationState) && isdefined(src, :ir) - return src.src.inlineable ? src.ir : nothing + elseif isa(src, OptimizationState) && isdefined(src, :ir) + return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing + elseif src === nothing && is_stmt_inline(stmt_flag) && isa(match, MethodMatch) + # when the source isn't available at this moment, try to re-infer and inline it + # NOTE we can make inference try to keep the source if the call is going to be inlined, + # but then inlining will depend on local state of inference and so the first entry + # and the succeeding ones may generate different code; rather we always re-infer + # the source to avoid the problem while it's obviously not most efficient + # HACK disable inlining for the re-inference to avoid cycles by making sure the following inference never comes here again + interp = NativeInterpreter(get_world_counter(interp); opt_params = OptimizationParams(; inlining = false)) + src, rt = typeinf_code(interp, match.method, match.spec_types, match.sparams, true) + return src end return nothing end @@ -57,7 +66,7 @@ mutable struct OptimizationState inlining = InliningState(params, EdgeTracker(s_edges, frame.valid_worlds), WorldView(code_cache(interp), frame.world), - inlining_policy(interp)) + interp) return new(frame.linfo, frame.src, nothing, frame.stmt_info, frame.mod, frame.sptypes, frame.slottypes, false, @@ -86,7 +95,7 @@ mutable struct OptimizationState inlining = InliningState(params, nothing, WorldView(code_cache(interp), get_world_counter()), - inlining_policy(interp)) + interp) return new(linfo, src, nothing, stmt_info, mod, sptypes_from_meth_instance(linfo), slottypes, false, @@ -128,6 +137,10 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError # This statement was marked as @inbounds by the user. If replaced by inlining, # any contained boundschecks may be removed const IR_FLAG_INBOUNDS = 0x01 +# This statement was marked as @inline by the user +const IR_FLAG_INLINE = 0x01 << 1 +# This statement was marked as @noinline by the user +const IR_FLAG_NOINLINE = 0x01 << 2 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 @@ -173,6 +186,9 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara return inlineable end +is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE != 0 +is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0 + # These affect control flow within the function (so may not be removed # if there is no usage within the function), but don't affect the purity # of the function as a whole. @@ -359,6 +375,7 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: renumber_ir_elements!(code, changemap, labelmap) inbounds_depth = 0 # Number of stacked inbounds + inline_flags = BitVector() meta = Any[] flags = fill(0x00, length(code)) for i = 1:length(code) @@ -373,16 +390,38 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: inbounds_depth -= 1 end stmt = nothing + elseif isexpr(stmt, :inline) + if stmt.args[1]::Bool + push!(inline_flags, true) + else + pop!(inline_flags) + end + stmt = nothing + elseif isexpr(stmt, :noinline) + if stmt.args[1]::Bool + push!(inline_flags, false) + else + pop!(inline_flags) + end + stmt = nothing else stmt = normalize(stmt, meta) end code[i] = stmt - if !(stmt === nothing) + if stmt !== nothing if inbounds_depth > 0 flags[i] |= IR_FLAG_INBOUNDS end + if !isempty(inline_flags) + if last(inline_flags) + flags[i] |= IR_FLAG_INLINE + else + flags[i] |= IR_FLAG_NOINLINE + end + end end end + @assert isempty(inline_flags) "malformed meta flags" strip_trailing_junk!(ci, code, stmtinfo, flags) cfg = compute_basic_blocks(code) types = Any[] diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 7e7baff136741..420ec1a8998e3 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -604,6 +604,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: argexprs::Vector{Any}, atypes::Vector{Any}, arginfos::Vector{Any}, arg_start::Int, istate::InliningState) + flag = ir.stmts[idx][:flag] new_argexprs = Any[argexprs[arg_start]] new_atypes = Any[atypes[arg_start]] # loop over original arguments and flatten any known iterators @@ -659,8 +660,9 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: info = call.info handled = false if isa(info, ConstCallInfo) - if maybe_handle_const_call!(ir, state1.id, new_stmt, info, new_sig, - call.rt, istate, false, todo) + if !is_stmt_noinline(flag) && maybe_handle_const_call!( + ir, state1.id, new_stmt, info, new_sig,call.rt, istate, flag, false, todo) + handled = true else info = info.call @@ -671,7 +673,7 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: MethodMatchInfo[info] : info.matches # See if we can inline this call to `iterate` analyze_single_call!(ir, todo, state1.id, new_stmt, - new_sig, call.rt, info, istate) + new_sig, call.rt, info, istate, flag) end if i != length(thisarginfo.each) valT = getfield_tfunc(call.rt, Const(1)) @@ -719,7 +721,7 @@ function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo):: return mi end -function resolve_todo(todo::InliningTodo, state::InliningState) +function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) (; match) = todo.spec::DelayedInliningSpec #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) @@ -755,9 +757,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState) return ConstantCase(src) end - if src !== nothing - src = state.policy(src) - end + src = inlining_policy(state.interp, src, flag, match) if src === nothing return compileable_specialization(et, match) @@ -771,9 +771,9 @@ function resolve_todo(todo::InliningTodo, state::InliningState) return InliningTodo(todo.mi, src) end -function resolve_todo(todo::UnionSplit, state::InliningState) +function resolve_todo(todo::UnionSplit, state::InliningState, flag::UInt8) UnionSplit(todo.fully_covered, todo.atype, - Pair{Any,Any}[sig=>resolve_todo(item, state) for (sig, item) in todo.cases]) + Pair{Any,Any}[sig=>resolve_todo(item, state, flag) for (sig, item) in todo.cases]) end function validate_sparams(sparams::SimpleVector) @@ -784,7 +784,7 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, atypes::Vector{Any}, - state::InliningState, @nospecialize(stmttyp)) + state::InliningState, @nospecialize(stmttyp), flag::UInt8) method = match.method methsig = method.sig @@ -804,7 +804,7 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any}, et = state.et - if !state.params.inlining + if !state.params.inlining || is_stmt_noinline(flag) return compileable_specialization(et, match) end @@ -818,7 +818,7 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any}, # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) state.mi_cache === nothing && return todo - return resolve_todo(todo, state) + return resolve_todo(todo, state, flag) end function InliningTodo(mi::MethodInstance, ir::IRCode) @@ -1043,7 +1043,7 @@ is_builtin(s::Signature) = s.ft ⊑ Builtin function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result)::InvokeCallInfo, - state::InliningState, todo::Vector{Pair{Int, Any}}) + state::InliningState, todo::Vector{Pair{Int, Any}}, flag::UInt8) stmt = ir.stmts[idx][:inst] calltype = ir.stmts[idx][:type] @@ -1057,17 +1057,17 @@ function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, (; match, result): atypes = atypes[4:end] pushfirst!(atypes, atype0) - if isa(result, InferenceResult) + if isa(result, InferenceResult) && !is_stmt_noinline(flag) (; mi) = item = InliningTodo(result, atypes, calltype) validate_sparams(mi.sparam_vals) || return nothing if argtypes_to_type(atypes) <: mi.def.sig - state.mi_cache !== nothing && (item = resolve_todo(item, state)) + state.mi_cache !== nothing && (item = resolve_todo(item, state, flag)) handle_single_case!(ir, stmt, idx, item, true, todo) return nothing end end - result = analyze_method!(match, atypes, state, calltype) + result = analyze_method!(match, atypes, state, calltype, flag) handle_single_case!(ir, stmt, idx, result, true, todo) return nothing end @@ -1162,7 +1162,7 @@ end function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, @nospecialize(stmt), sig::Signature, @nospecialize(calltype), infos::Vector{MethodMatchInfo}, - state::InliningState) + state::InliningState, flag::UInt8) cases = Pair{Any, Any}[] signature_union = Union{} only_method = nothing # keep track of whether there is one matching method @@ -1196,7 +1196,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int fully_covered = false continue end - case = analyze_method!(match, sig.atypes, state, calltype) + case = analyze_method!(match, sig.atypes, state, calltype, flag) if case === nothing fully_covered = false continue @@ -1223,7 +1223,7 @@ function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int match = meth[1] end fully_covered = true - case = analyze_method!(match, sig.atypes, state, calltype) + case = analyze_method!(match, sig.atypes, state, calltype, flag) case === nothing && return push!(cases, Pair{Any,Any}(match.spec_types, case)) end @@ -1245,7 +1245,7 @@ end function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr, info::ConstCallInfo, sig::Signature, @nospecialize(calltype), - state::InliningState, + state::InliningState, flag::UInt8, isinvoke::Bool, todo::Vector{Pair{Int, Any}}) # when multiple matches are found, bail out and later inliner will union-split this signature # TODO effectively use multiple constant analysis results here @@ -1257,7 +1257,7 @@ function maybe_handle_const_call!(ir::IRCode, idx::Int, stmt::Expr, validate_sparams(mi.sparam_vals) || return true mthd_sig = mi.def.sig mistypes = mi.specTypes - state.mi_cache !== nothing && (item = resolve_todo(item, state)) + state.mi_cache !== nothing && (item = resolve_todo(item, state, flag)) if sig.atype <: mthd_sig handle_single_case!(ir, stmt, idx, item, isinvoke, todo) return true @@ -1295,6 +1295,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) info = info.info end + flag = ir.stmts[idx][:flag] + # Inference determined this couldn't be analyzed. Don't question it. if info === false continue @@ -1304,7 +1306,8 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) # it'll have performed a specialized analysis for just this case. Use its # result. if isa(info, ConstCallInfo) - if maybe_handle_const_call!(ir, idx, stmt, info, sig, calltype, state, sig.f === Core.invoke, todo) + if !is_stmt_noinline(flag) && maybe_handle_const_call!( + ir, idx, stmt, info, sig, calltype, state, flag, sig.f === Core.invoke, todo) continue else info = info.call @@ -1312,7 +1315,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) end if isa(info, OpaqueClosureCallInfo) - result = analyze_method!(info.match, sig.atypes, state, calltype) + result = analyze_method!(info.match, sig.atypes, state, calltype, flag) handle_single_case!(ir, stmt, idx, result, false, todo) continue end @@ -1320,7 +1323,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) # Handle invoke if sig.f === Core.invoke if isa(info, InvokeCallInfo) - inline_invoke!(ir, idx, sig, info, state, todo) + inline_invoke!(ir, idx, sig, info, state, todo, flag) end continue end @@ -1334,7 +1337,7 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end - analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state) + analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state, flag) end todo end @@ -1399,7 +1402,8 @@ end function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::Expr, params::OptimizationParams) f, ft, atypes = sig.f, sig.ft, sig.atypes typ = ir.stmts[idx][:type] - if params.inlining && length(atypes) == 3 && istopfunction(f, :!==) + isinlining = params.inlining + if isinlining && length(atypes) == 3 && istopfunction(f, :!==) # special-case inliner for !== that precedes _methods_by_ftype union splitting # and that works, even though inference generally avoids inferring the `!==` Method if isa(typ, Const) @@ -1411,7 +1415,7 @@ function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::E not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa) ir[SSAValue(idx)] = not_call return true - elseif params.inlining && length(atypes) == 3 && istopfunction(f, :(>:)) + elseif isinlining && length(atypes) == 3 && istopfunction(f, :(>:)) # special-case inliner for issupertype # that works, even though inference generally avoids inferring the `>:` Method if isa(typ, Const) && _builtin_nothrow(<:, Any[atypes[3], atypes[2]], typ) @@ -1421,7 +1425,7 @@ function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::E subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2]) ir[SSAValue(idx)] = subtype_call return true - elseif params.inlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol) + elseif isinlining && f === TypeVar && 2 <= length(atypes) <= 4 && (atypes[2] ⊑ Symbol) ir[SSAValue(idx)] = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2], length(stmt.args) < 4 ? Bottom : stmt.args[3], length(stmt.args) == 2 ? Any : stmt.args[end]) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index a9ab2d231c13c..5d3325a2a5e3c 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -343,7 +343,7 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta nslots = length(ci.slotflags) resize!(ci.slottypes::Vector{Any}, nslots) resize!(ci.slotnames, nslots) - return ccall(:jl_compress_ir, Any, (Any, Any), def, ci) + return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci) else return ci end diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 677450f288c70..5f8f656312458 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -217,7 +217,6 @@ may_discard_trees(::AbstractInterpreter) = true verbose_stmt_info(::AbstractInterpreter) = false method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp)) -inlining_policy(::AbstractInterpreter) = default_inlining_policy """ By default `AbstractInterpreter` implements the following inference bail out logic: diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index ed09d5316473a..26e25126c393c 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -59,7 +59,8 @@ end # Meta expression head, these generally can't be deleted even when they are # in a dead branch but can be ignored when analyzing uses/liveness. -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo) +is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || + head === :loopinfo || head === :inline || head === :noinline) sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0 @@ -196,7 +197,7 @@ function specialize_method(method::Method, @nospecialize(atypes), sparams::Simpl if preexisting # check cached specializations # for an existing result stored there - return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes) + return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)::Union{Nothing,MethodInstance} end return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atypes, sparams) end diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl index c152dfb9fa6a5..6e05c96cd7936 100644 --- a/base/compiler/validation.jl +++ b/base/compiler/validation.jl @@ -16,6 +16,8 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}( :leave => 1:1, :pop_exception => 1:1, :inbounds => 1:1, + :inline => 1:1, + :noinline => 1:1, :boundscheck => 0:0, :copyast => 1:1, :meta => 0:typemax(Int), @@ -141,7 +143,7 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_ head === :const || head === :enter || head === :leave || head === :pop_exception || head === :method || head === :global || head === :static_parameter || head === :new || head === :splatnew || head === :thunk || head === :loopinfo || - head === :throw_undef_if_not || head === :code_coverage_effect + head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline validate_val!(x) else # TODO: nothing is actually in statement position anymore diff --git a/base/expr.jl b/base/expr.jl index f583a58f78dc5..226dfe5f900a9 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -209,9 +209,51 @@ end !!! compat "Julia 1.8" The usage within a function body requires at least Julia 1.8. + +--- + @inline block + +Give a hint to the compiler that calls within `block` are worth inlining. + +```julia +# The compiler will try to inline `f` +@inline f(...) + +# The compiler will try to inline `f`, `g` and `+` +@inline f(...) + g(...) +``` + +!!! note + A callsite annotation always has the precedence over the annotation applied to the + definition of the called function: + ```julia + @noinline function explicit_noinline(args...) + # body + end + + let + @inline explicit_noinline(args...) # will be inlined + end + ``` + +!!! note + When there are nested callsite annotations, the innermost annotation has the precedence: + ```julia + @noinline let a0, b0 = ... + a = @inline f(a0) # the compiler will try to inline this call + b = f(b0) # the compiler will NOT try to inline this call + return a, b + end + ``` + +!!! compat "Julia 1.8" + The callsite annotation requires at least Julia 1.8. """ -macro inline(ex) - esc(isa(ex, Expr) ? pushmeta!(ex, :inline) : ex) +macro inline(x) + return annotate_meta_def_or_block(x, :inline) +end +macro inline() + return Expr(:meta, :inline) end """ @@ -244,11 +286,54 @@ end !!! compat "Julia 1.8" The usage within a function body requires at least Julia 1.8. +--- + @noinline block + +Give a hint to the compiler that it should not inline the calls within `block`. + +```julia +# The compiler will try to not inline `f` +@noinline f(...) + +# The compiler will try to not inline `f`, `g` and `+` +@noinline f(...) + g(...) +``` + +!!! note + A callsite annotation always has the precedence over the annotation applied to the + definition of the called function: + ```julia + @inline function explicit_inline(args...) + # body + end + + let + @noinline explicit_inline(args...) # will not be inlined + end + ``` + +!!! note + When there are nested callsite annotations, the innermost annotation has the precedence: + ```julia + @inline let a0, b0 = ... + a = @noinline f(a0) # the compiler will NOT try to inline this call + b = f(b0) # the compiler will try to inline this call + return a, b + end + ``` + +!!! compat "Julia 1.8" + The callsite annotation requires at least Julia 1.8. + +--- !!! note If the function is trivial (for example returning a constant) it might get inlined anyway. """ -macro noinline(ex) - esc(isa(ex, Expr) ? pushmeta!(ex, :noinline) : ex) +macro noinline(x) + return annotate_meta_def_or_block(x, :noinline) +end +macro noinline() + return Expr(:meta, :noinline) end """ @@ -301,6 +386,15 @@ end ## some macro utilities ## +unwrap_macrocalls(@nospecialize(x)) = x +function unwrap_macrocalls(ex::Expr) + inner = ex + while inner.head === :macrocall + inner = inner.args[end]::Expr + end + return inner +end + function pushmeta!(ex::Expr, sym::Symbol, args::Any...) if isempty(args) tag = sym @@ -308,10 +402,7 @@ function pushmeta!(ex::Expr, sym::Symbol, args::Any...) tag = Expr(sym, args...)::Expr end - inner = ex - while inner.head === :macrocall - inner = inner.args[end]::Expr - end + inner = unwrap_macrocalls(ex) idx, exargs = findmeta(inner) if idx != 0 @@ -361,8 +452,23 @@ function findmetaarg(metaargs, sym) return 0 end -function is_short_function_def(ex) - ex.head === :(=) || return false +function annotate_meta_def_or_block(@nospecialize(ex), meta::Symbol) + inner = unwrap_macrocalls(ex) + if is_function_def(inner) + # annotation on a definition + return esc(pushmeta!(ex, meta)) + else + # annotation on a block + return Expr(:block, + Expr(meta, true), + Expr(:local, Expr(:(=), :val, esc(ex))), + Expr(meta, false), + :val) + end +end + +function is_short_function_def(@nospecialize(ex)) + isexpr(ex, :(=)) || return false while length(ex.args) >= 1 && isa(ex.args[1], Expr) (ex.args[1].head === :call) && return true (ex.args[1].head === :where || ex.args[1].head === :(::)) || return false @@ -370,9 +476,11 @@ function is_short_function_def(ex) end return false end +is_function_def(@nospecialize(ex)) = + return isexpr(ex, :function) || is_short_function_def(ex) || isexpr(ex, :->) function findmeta(ex::Expr) - if ex.head === :function || is_short_function_def(ex) || ex.head === :-> + if is_function_def(ex) body = ex.args[2]::Expr body.head === :block || error(body, " is not a block expression") return findmeta_block(ex.args) diff --git a/base/meta.jl b/base/meta.jl index b483630a92f8f..3fe815cd0cbc0 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -450,6 +450,7 @@ end _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals) -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo) +is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta + || head === :loopinfo || head === :inline || head === :noinline) end # module diff --git a/src/ast.scm b/src/ast.scm index bc8d847279fc9..e5148a507a4fd 100644 --- a/src/ast.scm +++ b/src/ast.scm @@ -289,7 +289,7 @@ ;; predicates and accessors (define (quoted? e) - (memq (car e) '(quote top core globalref outerref line break inert meta inbounds loopinfo))) + (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo))) (define (quotify e) `',e) (define (unquote e) (if (and (pair? e) (memq (car e) '(quote inert))) diff --git a/src/codegen.cpp b/src/codegen.cpp index 65702955691e2..e0b86c51bf7d1 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4434,7 +4434,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result) jl_value_t **args = (jl_value_t**)jl_array_data(ex->args); jl_sym_t *head = ex->head; if (head == meta_sym || head == inbounds_sym || head == coverageeffect_sym - || head == aliasscope_sym || head == popaliasscope_sym) { + || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { // some expression types are metadata and can be ignored // in statement position return; @@ -4869,7 +4869,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval) } else if (head == leave_sym || head == coverageeffect_sym || head == pop_exception_sym || head == enter_sym || head == inbounds_sym - || head == aliasscope_sym || head == popaliasscope_sym) { + || head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { jl_errorf("Expr(:%s) in value position", jl_symbol_name(head)); } else if (head == boundscheck_sym) { diff --git a/src/interpreter.c b/src/interpreter.c index 4686efe054edf..f999542d68c4f 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -312,7 +312,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s) return jl_true; } else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym || - head == aliasscope_sym || head == popaliasscope_sym) { + head == aliasscope_sym || head == popaliasscope_sym || head == inline_sym || head == noinline_sym) { return jl_nothing; } else if (head == gc_preserve_begin_sym || head == gc_preserve_end_sym) { diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm index f00ea0c9ba6d9..428b0513b7e52 100644 --- a/src/julia-syntax.scm +++ b/src/julia-syntax.scm @@ -3498,7 +3498,7 @@ f(x) = yt(x) thunk with-static-parameters toplevel-only global globalref outerref const-if-global thismodule const atomic null true false ssavalue isdefined toplevel module lambda - error gc_preserve_begin gc_preserve_end import using export))) + error gc_preserve_begin gc_preserve_end import using export inline noinline))) (define (local-in? s lam) (or (assq s (car (lam:vinfo lam))) @@ -4592,7 +4592,7 @@ f(x) = yt(x) (cons (car e) args))) ;; metadata expressions - ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope) + ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline) (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return)))) (cond ((eq? (car e) 'line) (set! current-loc e) @@ -4737,7 +4737,7 @@ f(x) = yt(x) (begin (set! linetable (cons (make-lineinfo name file line) linetable)) (set! current-loc 1))) (if (or reachable - (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope)))) + (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline)))) (begin (set! code (cons e code)) (set! i (+ i 1)) (set! locs (cons current-loc locs))))) diff --git a/src/macroexpand.scm b/src/macroexpand.scm index 5e55c7bbb29c1..f17f4d3510dc6 100644 --- a/src/macroexpand.scm +++ b/src/macroexpand.scm @@ -352,7 +352,7 @@ ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg)))) (else `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg)))))) - ((using import export meta line inbounds boundscheck loopinfo) (map unescape e)) + ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e)) ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted. ((symboliclabel) e) ((symbolicgoto) e) diff --git a/src/method.c b/src/method.c index 9c255f786b74d..45df3d6012d9e 100644 --- a/src/method.c +++ b/src/method.c @@ -84,7 +84,8 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve e->head == quote_sym || e->head == inert_sym || e->head == meta_sym || e->head == inbounds_sym || e->head == boundscheck_sym || e->head == loopinfo_sym || - e->head == aliasscope_sym || e->head == popaliasscope_sym) { + e->head == aliasscope_sym || e->head == popaliasscope_sym || + e->head == inline_sym || e->head == noinline_sym) { // ignore these } else { diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 9358dce3325f7..e788b7e56100e 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -389,7 +389,7 @@ function isinvoke(@nospecialize(x), pred) end return false end -code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...))::Core.CodeInfo +code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...)))::Core.CodeInfo).code @testset "@inline/@noinline annotation before definition" begin m = Module() @@ -414,24 +414,24 @@ code_typed1(args...; kwargs...) = (first∘first)(code_typed(args...; kwargs...) def_noinline_noconflict(x) = _def_noinline_noconflict(x) end - let ci = code_typed1(m.def_inline, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.def_inline, (Int,)) + @test all(code) do x !isinvoke(x, :_def_inline) end end - let ci = code_typed1(m.def_noinline, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.def_noinline, (Int,)) + @test any(code) do x isinvoke(x, :_def_noinline) end end # test that they don't conflict with other "before-definition" macros - let ci = code_typed1(m.def_inline_noconflict, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.def_inline_noconflict, (Int,)) + @test all(code) do x !isinvoke(x, :_def_inline_noconflict) end end - let ci = code_typed1(m.def_noinline_noconflict, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.def_noinline_noconflict, (Int,)) + @test any(code) do x isinvoke(x, :_def_noinline_noconflict) end end @@ -471,36 +471,149 @@ end end end - let ci = code_typed1(m.body_inline, (Int,)) - @test all(ci.code) do x + let code = code_typed1(m.body_inline, (Int,)) + @test all(code) do x !isinvoke(x, :_body_inline) end end - let ci = code_typed1(m.body_noinline, (Int,)) - @test any(ci.code) do x + let code = code_typed1(m.body_noinline, (Int,)) + @test any(code) do x isinvoke(x, :_body_noinline) end end # test annotations for `do` blocks - let ci = code_typed1(m.do_inline, (Int,)) + let code = code_typed1(m.do_inline, (Int,)) # what we test here is that both `simple_caller` and the anonymous function that the # `do` block creates should inlined away, and as a result there is only the unresolved call - @test all(ci.code) do x + @test all(code) do x !isinvoke(x, :simple_caller) && !isinvoke(x, mi->startswith(string(mi.def.name), '#')) end end - let ci = code_typed1(m.do_noinline, (Int,)) + let code = code_typed1(m.do_noinline, (Int,)) # the anonymous function that the `do` block created shouldn't be inlined here - @test any(ci.code) do x + @test any(code) do x isinvoke(x, mi->startswith(string(mi.def.name), '#')) end end end +@testset "callsite @inline/@noinline annotations" begin + m = Module() + @eval m begin + # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this + g = 0 + + @noinline noinlined_explicit(x) = x + force_inline_explicit(x) = @inline noinlined_explicit(x) + force_inline_block_explicit(x) = @inline noinlined_explicit(x) + noinlined_explicit(x) + noinlined_implicit(x) = g + force_inline_implicit(x) = @inline noinlined_implicit(x) + force_inline_block_implicit(x) = @inline noinlined_implicit(x) + noinlined_implicit(x) + + @inline inlined_explicit(x) = x + force_noinline_explicit(x) = @noinline inlined_explicit(x) + force_noinline_block_explicit(x) = @noinline inlined_explicit(x) + inlined_explicit(x) + inlined_implicit(x) = x + force_noinline_implicit(x) = @noinline inlined_implicit(x) + force_noinline_block_implicit(x) = @noinline inlined_implicit(x) + inlined_implicit(x) + + # test callsite annotations for constant-prop'ed calls + + @noinline Base.@aggressive_constprop noinlined_constprop_explicit(a) = a+g + force_inline_constprop_explicit() = @inline noinlined_constprop_explicit(0) + Base.@aggressive_constprop noinlined_constprop_implicit(a) = a+g + force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0) + + @inline Base.@aggressive_constprop inlined_constprop_explicit(a) = a+g + force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0) + @inline Base.@aggressive_constprop inlined_constprop_implicit(a) = a+g + force_noinline_constprop_implicit() = @noinline inlined_constprop_implicit(0) + + @noinline notinlined(a) = a + function nested(a0, b0) + @noinline begin + a = @inline notinlined(a0) # this call should be inlined + b = notinlined(b0) # this call should NOT be inlined + return a, b + end + end + + # test inlining of un-cached callsites + + import Core.Compiler: isType + + limited(a) = @noinline(isType(a)) ? @inline(limited(a.parameters[1])) : rand(a) + + function multilimited(a) + if @noinline(isType(a)) + return @inline(multilimited(a.parameters[1])) + else + return rand(Bool) ? rand(a) : @inline(multilimited(a)) + end + end + end + + let code = code_typed1(m.force_inline_explicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_explicit), code) + end + let code = code_typed1(m.force_inline_block_explicit, (Int,)) + @test all(code) do x + !isinvoke(x, :noinlined_explicit) && + !isinvoke(x, :(+)) + end + end + let code = code_typed1(m.force_inline_implicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_implicit), code) + end + let code = code_typed1(m.force_inline_block_implicit, (Int,)) + @test all(x->!isinvoke(x, :noinlined_explicit), code) + end + + let code = code_typed1(m.force_noinline_explicit, (Int,)) + @test any(x->isinvoke(x, :inlined_explicit), code) + end + let code = code_typed1(m.force_noinline_block_explicit, (Int,)) + @test count(x->isinvoke(x, :inlined_explicit), code) == 2 + end + let code = code_typed1(m.force_noinline_implicit, (Int,)) + @test any(x->isinvoke(x, :inlined_implicit), code) + end + let code = code_typed1(m.force_noinline_block_implicit, (Int,)) + @test count(x->isinvoke(x, :inlined_implicit), code) == 2 + end + + let code = code_typed1(m.force_inline_constprop_explicit) + @test all(x->!isinvoke(x, :noinlined_constprop_explicit), code) + end + let code = code_typed1(m.force_inline_constprop_implicit) + @test all(x->!isinvoke(x, :noinlined_constprop_implicit), code) + end + + let code = code_typed1(m.force_noinline_constprop_explicit) + @test any(x->isinvoke(x, :inlined_constprop_explicit), code) + end + let code = code_typed1(m.force_noinline_constprop_implicit) + @test any(x->isinvoke(x, :inlined_constprop_implicit), code) + end + + let code = code_typed1(m.nested, (Int,Int)) + @test count(x->isinvoke(x, :notinlined), code) == 1 + end + + let code = code_typed1(m.limited, (Any,)) + @test count(x->isinvoke(x, :isType), code) == 2 + end + # check that inlining for recursive callsites doesn't depend on inference local cache + let code1 = code_typed1(m.multilimited, (Any,)) + code2 = code_typed1(m.multilimited, (Any,)) + @test code1 == code2 + end +end + # force constant-prop' for `setproperty!` -let m = Module() - ci = @eval m begin +# https://github.com/JuliaLang/julia/pull/41882 +let ci = @eval Module() begin # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly # and it leads to inlining failure From 1c7482f215c2c0e7d5210e04fe5e35ed70a2f3e7 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Fri, 20 Aug 2021 15:34:08 -0400 Subject: [PATCH 02/11] set ssaflags on `CodeInfo` construction --- base/compiler/optimize.jl | 72 +++++++++------------------------ base/compiler/ssair/slot2ssa.jl | 6 +-- src/julia.h | 3 +- src/method.c | 62 +++++++++++++++++++++++++++- 4 files changed, 84 insertions(+), 59 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 3b7b437ce6c6d..a40b5b6187d0c 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -134,12 +134,14 @@ const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError # const SLOT_CALLED = 64 -# This statement was marked as @inbounds by the user. If replaced by inlining, -# any contained boundschecks may be removed -const IR_FLAG_INBOUNDS = 0x01 -# This statement was marked as @inline by the user +# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c + +# This statement is marked as @inbounds by user. +# Ff replaced by inlining, any contained boundschecks may be removed. +const IR_FLAG_INBOUNDS = 0x01 << 0 +# This statement is marked as @inline by user const IR_FLAG_INLINE = 0x01 << 1 -# This statement was marked as @noinline by the user +# This statement is marked as @noinline by user const IR_FLAG_NOINLINE = 0x01 << 2 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. @@ -374,75 +376,37 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv:: end renumber_ir_elements!(code, changemap, labelmap) - inbounds_depth = 0 # Number of stacked inbounds - inline_flags = BitVector() meta = Any[] - flags = fill(0x00, length(code)) for i = 1:length(code) - stmt = code[i] - if isexpr(stmt, :inbounds) - arg1 = stmt.args[1] - if arg1 === true # push - inbounds_depth += 1 - elseif arg1 === false # clear - inbounds_depth = 0 - elseif inbounds_depth > 0 # pop - inbounds_depth -= 1 - end - stmt = nothing - elseif isexpr(stmt, :inline) - if stmt.args[1]::Bool - push!(inline_flags, true) - else - pop!(inline_flags) - end - stmt = nothing - elseif isexpr(stmt, :noinline) - if stmt.args[1]::Bool - push!(inline_flags, false) - else - pop!(inline_flags) - end - stmt = nothing - else - stmt = normalize(stmt, meta) - end - code[i] = stmt - if stmt !== nothing - if inbounds_depth > 0 - flags[i] |= IR_FLAG_INBOUNDS - end - if !isempty(inline_flags) - if last(inline_flags) - flags[i] |= IR_FLAG_INLINE - else - flags[i] |= IR_FLAG_NOINLINE - end - end - end + code[i] = remove_meta!(code[i], meta) end - @assert isempty(inline_flags) "malformed meta flags" - strip_trailing_junk!(ci, code, stmtinfo, flags) + strip_trailing_junk!(ci, code, stmtinfo) cfg = compute_basic_blocks(code) types = Any[] - stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags) + stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, ci.ssaflags) ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable::Union{Vector{LineInfoNode},Vector{Any}}), sv.slottypes, meta, sv.sptypes) return ir end -function normalize(@nospecialize(stmt), meta::Vector{Any}) +function remove_meta!(@nospecialize(stmt), meta::Vector{Any}) if isa(stmt, Expr) - if stmt.head === :meta + head = stmt.head + if head === :meta args = stmt.args if length(args) > 0 push!(meta, stmt) end return nothing + elseif is_ssaflag_head(head) + # we processed these flags in `jl_code_info_set_ir` + return nothing end end return stmt end +is_ssaflag_head(head::Symbol) = head === :inbounds || head === :inline || head === :noinline + function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code svdef = sv.linfo.def diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 777e7e2bb0af7..2e3d1da1c168f 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -183,7 +183,7 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt) end -function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}, flags::Vector{UInt8}) +function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}) # Remove `nothing`s at the end, we don't handle them well # (we expect the last instruction to be a terminator) ssavaluetypes = ci.ssavaluetypes::Vector{Any} @@ -193,7 +193,7 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any} resize!(ssavaluetypes, i) resize!(ci.codelocs, i) resize!(info, i) - resize!(flags, i) + resize!(ci.ssaflags, i) break end end @@ -205,7 +205,7 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any} push!(ssavaluetypes, Union{}) push!(ci.codelocs, 0) push!(info, nothing) - push!(flags, 0x00) + push!(ci.ssaflags, 0x00) end nothing end diff --git a/src/julia.h b/src/julia.h index e6edc71b42dfa..e53b33bef674d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -255,7 +255,8 @@ typedef struct _jl_code_info_t { jl_value_t *ssavaluetypes; // types of ssa values (or count of them) jl_array_t *ssaflags; // flags associated with each statement: // 0 = inbounds - // 1,2 = inlinehint,always-inline,noinline + // 1 = inline + // 2 = noinline // 3 = strict-ieee (strictfp) // 4 = effect-free (may be deleted if unused) // 5-6 = diff --git a/src/method.c b/src/method.c index 45df3d6012d9e..7def65ae90c16 100644 --- a/src/method.c +++ b/src/method.c @@ -254,6 +254,11 @@ JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl } } +jl_value_t* expr_arg1(jl_value_t* expr) { + jl_array_t *args = ((jl_expr_t*)expr)->args; + return jl_array_ptr_ref(args, 0); +} + // copy a :lambda Expr into its CodeInfo representation, // including popping of known meta nodes static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) @@ -275,8 +280,17 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) jl_gc_wb(li, li->code); size_t n = jl_array_len(body); jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code); + li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n); + jl_gc_wb(li, li->ssaflags); + int inbounds_depth = 0; // number of stacked inbounds + // isempty(inline_flags): no user annotation + // last(inline_flags) == 1: inline region + // last(inline_flags) == 0: noinline region + arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0); for (j = 0; j < n; j++) { jl_value_t *st = bd[j]; + int is_flag_stmt = 0; + // check :meta expression if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) { size_t k, ins = 0, na = jl_expr_nargs(st); jl_array_t *meta = ((jl_expr_t*)st)->args; @@ -298,10 +312,57 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) else jl_array_del_end(meta, na - ins); } + // check other flag expressions + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inbounds_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // push + inbounds_depth += 1; + else if (arg1 == (jl_value_t*)jl_false) // clear + inbounds_depth = 0; + else if (inbounds_depth > 0) // pop + inbounds_depth -= 1; + } + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inline_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // enter inline region + arraylist_push(inline_flags, (void*)1); + else { // exit inline region + assert(arg1 == (jl_value_t*)jl_false); + arraylist_pop(inline_flags); + } + } + else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == noinline_sym) { + is_flag_stmt = 1; + jl_value_t *arg1 = expr_arg1(st); + if (arg1 == (jl_value_t*)jl_true) // enter noinline region + arraylist_push(inline_flags, (void*)0); + else { // exit noinline region + assert(arg1 == (jl_value_t*)jl_false); + arraylist_pop(inline_flags); + } + } else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) { jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0))); } + + if (is_flag_stmt) + jl_array_uint8_set(li->ssaflags, j, 0); + else { + uint8_t flag = 0; + if (inbounds_depth > 0) + flag |= 1 << 0; + if (inline_flags->len > 0) { + void* inline_flag = inline_flags->items[inline_flags->len - 1]; + flag |= 1 << (inline_flag ? 1 : 2); + } + jl_array_uint8_set(li->ssaflags, j, flag); + } } + assert(inline_flags->len == 0); // malformed otherwise + arraylist_free(inline_flags); + free(inline_flags); jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1); jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0); size_t nslots = jl_array_len(vis); @@ -314,7 +375,6 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) jl_gc_wb(li, li->slotflags); li->ssavaluetypes = jl_box_long(nssavalue); jl_gc_wb(li, li->ssavaluetypes); - li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, 0); // Flags that need to be copied to slotflags const uint8_t vinfo_mask = 8 | 16 | 32 | 64; From addd655c4f95f4599f977c2f1f3edbc9fc606bce Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Sun, 22 Aug 2021 01:46:54 +0900 Subject: [PATCH 03/11] try to keep source if it will be force-inlined --- base/compiler/abstractinterpretation.jl | 16 +++--- base/compiler/inferenceresult.jl | 6 +-- base/compiler/inferencestate.jl | 4 +- base/compiler/optimize.jl | 66 ++++++++++++++++++------- base/compiler/ssair/inlining.jl | 13 ++--- base/compiler/typeinfer.jl | 26 +++++----- base/compiler/types.jl | 4 +- test/compiler/inline.jl | 17 +++++-- 8 files changed, 99 insertions(+), 53 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index b79b36978f312..17906e6f816bc 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -529,8 +529,8 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul mi === nothing && return nothing # try constant prop' inf_cache = get_inference_cache(interp) - inf_result = cache_lookup(mi, argtypes, inf_cache) - if inf_result === nothing + cache = cache_lookup(mi, argtypes, inf_cache) + if cache === nothing # if there might be a cycle, check to make sure we don't end up # calling ourselves here. let result = result # prevent capturing @@ -549,8 +549,10 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul frame = InferenceState(inf_result, #=cache=#false, interp) frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it frame.parent = sv - push!(inf_cache, inf_result) + push!(inf_cache, (inf_result, frame.stmt_info)) typeinf(interp, frame) || return nothing + else + inf_result, _ = cache end result = inf_result.result # if constant inference hits a cycle, just bail out @@ -592,7 +594,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me return nothing end mi = mi::MethodInstance - if !force && !const_prop_methodinstance_heuristic(interp, match, mi) + if !force && !const_prop_methodinstance_heuristic(interp, match, mi, sv) add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic") return nothing end @@ -696,7 +698,7 @@ end # This is a heuristic to avoid trying to const prop through complicated functions # where we would spend a lot of time, but are probably unlikely to get an improved # result anyway. -function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance) +function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance, sv::InferenceState) method = match.method if method.is_for_opaque_closure # Not inlining an opaque closure can be very expensive, so be generous @@ -715,8 +717,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match: if isdefined(code, :inferred) && !cache_inlineable cache_inf = code.inferred if !(cache_inf === nothing) - # TODO maybe we want to respect callsite `@inline`/`@noinline` annotations here ? - cache_inlineable = inlining_policy(interp, cache_inf, 0x00, match) !== nothing + src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv), nothing) + cache_inlineable = src !== nothing end end if !cache_inlineable diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl index 026b5286979cb..24e0fe42b4c88 100644 --- a/base/compiler/inferenceresult.jl +++ b/base/compiler/inferenceresult.jl @@ -141,12 +141,12 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing, va_override:: return cache_argtypes, falses(length(cache_argtypes)) end -function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult}) +function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{Tuple{InferenceResult,Vector{Any}}}) method = linfo.def::Method nargs::Int = method.nargs method.isva && (nargs -= 1) length(given_argtypes) >= nargs || return nothing - for cached_result in cache + for (cached_result, stmt_info) in cache cached_result.linfo === linfo || continue cache_match = true cache_argtypes = cached_result.argtypes @@ -165,7 +165,7 @@ function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache: cache_overridden_by_const[end]) end cache_match || continue - return cached_result + return cached_result, stmt_info end return nothing end diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index d76a4ebd18f6e..e89ff574bda2c 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -113,7 +113,7 @@ mutable struct InferenceState CachedMethodTable(method_table(interp)), interp) result.result = frame - cached && push!(get_inference_cache(interp), result) + cached && push!(get_inference_cache(interp), (result, stmt_info)) return frame end end @@ -296,3 +296,5 @@ function print_callstack(sv::InferenceState) sv = sv.parent end end + +get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc] diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index a40b5b6187d0c..2d86483953d6c 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -28,29 +28,57 @@ struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter interp::I end -function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8, match::Union{MethodMatch,InferenceResult}) +include("compiler/ssair/driver.jl") + +function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), + stmt_flag::UInt8, todo::Union{Nothing,InliningTodo}) if isa(src, CodeInfo) || isa(src, Vector{UInt8}) src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) return src_inferred && src_inlineable ? src : nothing elseif isa(src, OptimizationState) && isdefined(src, :ir) return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing - elseif src === nothing && is_stmt_inline(stmt_flag) && isa(match, MethodMatch) - # when the source isn't available at this moment, try to re-infer and inline it - # NOTE we can make inference try to keep the source if the call is going to be inlined, - # but then inlining will depend on local state of inference and so the first entry - # and the succeeding ones may generate different code; rather we always re-infer - # the source to avoid the problem while it's obviously not most efficient - # HACK disable inlining for the re-inference to avoid cycles by making sure the following inference never comes here again - interp = NativeInterpreter(get_world_counter(interp); opt_params = OptimizationParams(; inlining = false)) - src, rt = typeinf_code(interp, match.method, match.spec_types, match.sparams, true) - return src + elseif src === nothing && todo !== nothing && is_stmt_inline(stmt_flag) + # if this statement is forced to be inlined, try additional effort to find the source + # in the local cache, and if found optimize and inline it + mi = todo.mi + (; match, atypes, stmttype) = todo.spec::DelayedInliningSpec + if isa(match, MethodMatch) + cache = cache_lookup(mi, atypes, get_inference_cache(interp)) + cache === nothing && return nothing + inf_result, stmt_info = cache + else + local cache = nothing + for (inf_result, stmt_info) in get_inference_cache(interp) + if inf_result === match + cache = inf_result, stmt_info + break + end + end + cache === nothing && return nothing + inf_result, stmt_info = cache + end + src = inf_result.src + if isa(src, CodeInfo) + elseif isa(src, OptimizationState) + src = src.src + else + return nothing + end + # HACK disable inlining for this optimization, otherwise we're likely to come back to here again + params = OptimizationParams(interp) + newparams = OptimizationParams(; inlining = false, + max_methods = params.MAX_METHODS, + tuple_splat = params.MAX_TUPLE_SPLAT, + union_splitting = params.MAX_UNION_SPLITTING, + unoptimize_throw_blocks = params.unoptimize_throw_blocks) + opt = OptimizationState(mi, copy(src), newparams, interp; stmt_info) + optimize(interp, opt, newparams, stmttype) + return opt.ir end return nothing end -include("compiler/ssair/driver.jl") - mutable struct OptimizationState linfo::MethodInstance src::CodeInfo @@ -72,7 +100,8 @@ mutable struct OptimizationState frame.sptypes, frame.slottypes, false, inlining) end - function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter) + function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter; + stmt_info::Union{Nothing,Vector{Any}} = nothing) # prepare src for running optimization passes # if it isn't already nssavalues = src.ssavaluetypes @@ -86,7 +115,9 @@ mutable struct OptimizationState if slottypes === nothing slottypes = Any[ Any for i = 1:nslots ] end - stmt_info = Any[nothing for i = 1:nssavalues] + if stmt_info === nothing + stmt_info = Any[nothing for i = 1:nssavalues] + end # cache some useful state computations def = linfo.def mod = isa(def, Method) ? def.module : def @@ -103,10 +134,11 @@ mutable struct OptimizationState end end -function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter) +function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter; + stmt_info::Union{Nothing,Vector{Any}} = nothing) src = retrieve_code_info(linfo) src === nothing && return nothing - return OptimizationState(linfo, src, params, interp) + return OptimizationState(linfo, src, params, interp; stmt_info) end function ir_to_codeinf!(opt::OptimizationState) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 420ec1a8998e3..441df2fdfab72 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -722,7 +722,8 @@ function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo):: end function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) - (; match) = todo.spec::DelayedInliningSpec + mi = todo.mi + (; match, atypes) = todo.spec::DelayedInliningSpec #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) isconst, src = false, nothing @@ -737,7 +738,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) isconst, src = false, inferred_src end else - linfo = get(state.mi_cache, todo.mi, nothing) + linfo = get(state.mi_cache, mi, nothing) if linfo isa CodeInstance if invoke_api(linfo) == 2 # in this case function can be inlined to a constant @@ -753,11 +754,11 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) et = state.et if isconst && et !== nothing - push!(et, todo.mi) + push!(et, mi) return ConstantCase(src) end - src = inlining_policy(state.interp, src, flag, match) + src = inlining_policy(state.interp, src, flag, todo) if src === nothing return compileable_specialization(et, match) @@ -767,8 +768,8 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) src = copy(src) end - et !== nothing && push!(et, todo.mi) - return InliningTodo(todo.mi, src) + et !== nothing && push!(et, mi) + return InliningTodo(mi, src) end function resolve_todo(todo::UnionSplit, state::InliningState, flag::UInt8) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 5d3325a2a5e3c..670e99c295759 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -438,13 +438,14 @@ function finish(me::InferenceState, interp::AbstractInterpreter) # inspect whether our inference had a limited result accuracy, # else it may be suitable to cache me.bestguess = cycle_fix_limited(me.bestguess, me) + parent = me.parent limited_ret = me.bestguess isa LimitedAccuracy limited_src = false if !limited_ret gt = me.src.ssavaluetypes::Vector{Any} for j = 1:length(gt) gt[j] = gtj = cycle_fix_limited(gt[j], me) - if gtj isa LimitedAccuracy && me.parent !== nothing + if gtj isa LimitedAccuracy && parent !== nothing limited_src = true break end @@ -452,20 +453,24 @@ function finish(me::InferenceState, interp::AbstractInterpreter) end if limited_ret # a parent may be cached still, but not this intermediate work: - # we can throw everything else away now - me.result.src = nothing + # we can throw everything else away now, unless inlinear will still want to have the inferred source + if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent))) + me.result.src = nothing + end me.cached = false me.src.inlineable = false unlock_mi_inference(interp, me.linfo) elseif limited_src # a type result will be cached still, but not this intermediate work: - # we can throw everything else away now - me.result.src = nothing + # we can throw everything else away now, unless inlinear will still want to have the inferred source + if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent))) + me.result.src = nothing + end me.src.inlineable = false else # annotate fulltree with type information, # either because we are the outermost code, or we might use this later - doopt = (me.cached || me.parent !== nothing) + doopt = (me.cached || parent !== nothing) type_annotate!(me, doopt) if doopt && may_optimize(interp) me.result.src = OptimizationState(me, OptimizationParams(interp), interp) @@ -834,14 +839,9 @@ function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize mi = specialize_method(method, atypes, sparams)::MethodInstance ccall(:jl_typeinf_begin, Cvoid, ()) result = InferenceResult(mi) - frame = InferenceState(result, false, interp) + frame = InferenceState(result, run_optimizer, interp) frame === nothing && return (nothing, Any) - if typeinf(interp, frame) && run_optimizer - opt_params = OptimizationParams(interp) - result.src = src = OptimizationState(frame, opt_params, interp) - optimize(interp, src, opt_params, ignorelimited(result.result)) - frame.src = finish!(interp, result) - end + typeinf(interp, frame) ccall(:jl_typeinf_end, Cvoid, ()) frame.inferred || return (nothing, Any) return (frame.src, widenconst(ignorelimited(result.result))) diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 5f8f656312458..9a354fdd101ca 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -142,7 +142,7 @@ It contains many parameters used by the compilation pipeline. """ struct NativeInterpreter <: AbstractInterpreter # Cache of inference results for this particular interpreter - cache::Vector{InferenceResult} + cache::Vector{Tuple{InferenceResult,Vector{Any}}} # The world age we're working inside of world::UInt @@ -167,7 +167,7 @@ struct NativeInterpreter <: AbstractInterpreter return new( # Initially empty cache - Vector{InferenceResult}(), + Tuple{InferenceResult,Vector{Any}}[], # world age counter world, diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index e788b7e56100e..53fb188263f9f 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -173,7 +173,7 @@ function f_ifelse(x) return b ? x + 1 : x end # 2 for now because the compiler leaves a GotoNode around -@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 +@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 # Test that inlining of _apply_iterate properly hits the inference cache @noinline cprop_inline_foo1() = (1, 1) @@ -543,9 +543,17 @@ end import Core.Compiler: isType - limited(a) = @noinline(isType(a)) ? @inline(limited(a.parameters[1])) : rand(a) + function limited(a) + @nospecialize a + if @noinline(isType(a)) + return @inline(limited(a.parameters[1])) + else + return rand(a) + end + end function multilimited(a) + @nospecialize a if @noinline(isType(a)) return @inline(multilimited(a.parameters[1])) else @@ -602,12 +610,13 @@ end end let code = code_typed1(m.limited, (Any,)) - @test count(x->isinvoke(x, :isType), code) == 2 + @test count(x->isinvoke(x, :isType), code) == 2 # caller + inlined callee end - # check that inlining for recursive callsites doesn't depend on inference local cache let code1 = code_typed1(m.multilimited, (Any,)) code2 = code_typed1(m.multilimited, (Any,)) + # check that inlining for recursive callsites doesn't depend on inference local cache @test code1 == code2 + @test count(x->isinvoke(x, :isType), code1) == 3 # caller + inlined callee + inlined callee end end From 1e1378fb32145a37d3b60cc933bacd80ec0ccb7f Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 24 Aug 2021 15:05:44 +0900 Subject: [PATCH 04/11] give inlining when source isn't available --- base/compiler/abstractinterpretation.jl | 10 ++-- base/compiler/inferenceresult.jl | 6 +-- base/compiler/inferencestate.jl | 2 +- base/compiler/optimize.jl | 64 ++++++------------------- base/compiler/ssair/inlining.jl | 2 +- base/compiler/typeinfer.jl | 26 +++++----- base/compiler/types.jl | 4 +- test/compiler/inline.jl | 34 +------------ 8 files changed, 39 insertions(+), 109 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 17906e6f816bc..ed7e6a94d4dae 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -529,8 +529,8 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul mi === nothing && return nothing # try constant prop' inf_cache = get_inference_cache(interp) - cache = cache_lookup(mi, argtypes, inf_cache) - if cache === nothing + inf_result = cache_lookup(mi, argtypes, inf_cache) + if inf_result === nothing # if there might be a cycle, check to make sure we don't end up # calling ourselves here. let result = result # prevent capturing @@ -549,10 +549,8 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul frame = InferenceState(inf_result, #=cache=#false, interp) frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it frame.parent = sv - push!(inf_cache, (inf_result, frame.stmt_info)) + push!(inf_cache, inf_result) typeinf(interp, frame) || return nothing - else - inf_result, _ = cache end result = inf_result.result # if constant inference hits a cycle, just bail out @@ -717,7 +715,7 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match: if isdefined(code, :inferred) && !cache_inlineable cache_inf = code.inferred if !(cache_inf === nothing) - src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv), nothing) + src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv)) cache_inlineable = src !== nothing end end diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl index 24e0fe42b4c88..026b5286979cb 100644 --- a/base/compiler/inferenceresult.jl +++ b/base/compiler/inferenceresult.jl @@ -141,12 +141,12 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing, va_override:: return cache_argtypes, falses(length(cache_argtypes)) end -function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{Tuple{InferenceResult,Vector{Any}}}) +function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult}) method = linfo.def::Method nargs::Int = method.nargs method.isva && (nargs -= 1) length(given_argtypes) >= nargs || return nothing - for (cached_result, stmt_info) in cache + for cached_result in cache cached_result.linfo === linfo || continue cache_match = true cache_argtypes = cached_result.argtypes @@ -165,7 +165,7 @@ function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache: cache_overridden_by_const[end]) end cache_match || continue - return cached_result, stmt_info + return cached_result end return nothing end diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl index e89ff574bda2c..216c397af31e4 100644 --- a/base/compiler/inferencestate.jl +++ b/base/compiler/inferencestate.jl @@ -113,7 +113,7 @@ mutable struct InferenceState CachedMethodTable(method_table(interp)), interp) result.result = frame - cached && push!(get_inference_cache(interp), (result, stmt_info)) + cached && push!(get_inference_cache(interp), result) return frame end end diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 2d86483953d6c..d173db34e4fea 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -28,57 +28,25 @@ struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter interp::I end -include("compiler/ssair/driver.jl") - -function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), - stmt_flag::UInt8, todo::Union{Nothing,InliningTodo}) +function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8) if isa(src, CodeInfo) || isa(src, Vector{UInt8}) src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src) src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src) return src_inferred && src_inlineable ? src : nothing elseif isa(src, OptimizationState) && isdefined(src, :ir) return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing - elseif src === nothing && todo !== nothing && is_stmt_inline(stmt_flag) - # if this statement is forced to be inlined, try additional effort to find the source - # in the local cache, and if found optimize and inline it - mi = todo.mi - (; match, atypes, stmttype) = todo.spec::DelayedInliningSpec - if isa(match, MethodMatch) - cache = cache_lookup(mi, atypes, get_inference_cache(interp)) - cache === nothing && return nothing - inf_result, stmt_info = cache - else - local cache = nothing - for (inf_result, stmt_info) in get_inference_cache(interp) - if inf_result === match - cache = inf_result, stmt_info - break - end - end - cache === nothing && return nothing - inf_result, stmt_info = cache - end - src = inf_result.src - if isa(src, CodeInfo) - elseif isa(src, OptimizationState) - src = src.src - else - return nothing - end - # HACK disable inlining for this optimization, otherwise we're likely to come back to here again - params = OptimizationParams(interp) - newparams = OptimizationParams(; inlining = false, - max_methods = params.MAX_METHODS, - tuple_splat = params.MAX_TUPLE_SPLAT, - union_splitting = params.MAX_UNION_SPLITTING, - unoptimize_throw_blocks = params.unoptimize_throw_blocks) - opt = OptimizationState(mi, copy(src), newparams, interp; stmt_info) - optimize(interp, opt, newparams, stmttype) - return opt.ir + else + # maybe we want to make inference keep the source in a local cache if a statement is going to inlined + # and re-optimize it here with disabling further inlining to avoid infinite optimization loop + # (we can even natively try to re-infer it entirely) + # but it seems like that "single-level-inlining" is more trouble and complex than it's worth + # see https://github.com/JuliaLang/julia/pull/41328/commits/5557c2fe70d9672089a17c0f6a9f30fafcf0cb7c + return nothing end - return nothing end +include("compiler/ssair/driver.jl") + mutable struct OptimizationState linfo::MethodInstance src::CodeInfo @@ -100,8 +68,7 @@ mutable struct OptimizationState frame.sptypes, frame.slottypes, false, inlining) end - function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter; - stmt_info::Union{Nothing,Vector{Any}} = nothing) + function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter) # prepare src for running optimization passes # if it isn't already nssavalues = src.ssavaluetypes @@ -115,9 +82,7 @@ mutable struct OptimizationState if slottypes === nothing slottypes = Any[ Any for i = 1:nslots ] end - if stmt_info === nothing - stmt_info = Any[nothing for i = 1:nssavalues] - end + stmt_info = Any[nothing for i = 1:nssavalues] # cache some useful state computations def = linfo.def mod = isa(def, Method) ? def.module : def @@ -134,11 +99,10 @@ mutable struct OptimizationState end end -function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter; - stmt_info::Union{Nothing,Vector{Any}} = nothing) +function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter) src = retrieve_code_info(linfo) src === nothing && return nothing - return OptimizationState(linfo, src, params, interp; stmt_info) + return OptimizationState(linfo, src, params, interp) end function ir_to_codeinf!(opt::OptimizationState) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 441df2fdfab72..dcc424214266d 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -758,7 +758,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) return ConstantCase(src) end - src = inlining_policy(state.interp, src, flag, todo) + src = inlining_policy(state.interp, src, flag) if src === nothing return compileable_specialization(et, match) diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl index 670e99c295759..5d3325a2a5e3c 100644 --- a/base/compiler/typeinfer.jl +++ b/base/compiler/typeinfer.jl @@ -438,14 +438,13 @@ function finish(me::InferenceState, interp::AbstractInterpreter) # inspect whether our inference had a limited result accuracy, # else it may be suitable to cache me.bestguess = cycle_fix_limited(me.bestguess, me) - parent = me.parent limited_ret = me.bestguess isa LimitedAccuracy limited_src = false if !limited_ret gt = me.src.ssavaluetypes::Vector{Any} for j = 1:length(gt) gt[j] = gtj = cycle_fix_limited(gt[j], me) - if gtj isa LimitedAccuracy && parent !== nothing + if gtj isa LimitedAccuracy && me.parent !== nothing limited_src = true break end @@ -453,24 +452,20 @@ function finish(me::InferenceState, interp::AbstractInterpreter) end if limited_ret # a parent may be cached still, but not this intermediate work: - # we can throw everything else away now, unless inlinear will still want to have the inferred source - if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent))) - me.result.src = nothing - end + # we can throw everything else away now + me.result.src = nothing me.cached = false me.src.inlineable = false unlock_mi_inference(interp, me.linfo) elseif limited_src # a type result will be cached still, but not this intermediate work: - # we can throw everything else away now, unless inlinear will still want to have the inferred source - if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent))) - me.result.src = nothing - end + # we can throw everything else away now + me.result.src = nothing me.src.inlineable = false else # annotate fulltree with type information, # either because we are the outermost code, or we might use this later - doopt = (me.cached || parent !== nothing) + doopt = (me.cached || me.parent !== nothing) type_annotate!(me, doopt) if doopt && may_optimize(interp) me.result.src = OptimizationState(me, OptimizationParams(interp), interp) @@ -839,9 +834,14 @@ function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize mi = specialize_method(method, atypes, sparams)::MethodInstance ccall(:jl_typeinf_begin, Cvoid, ()) result = InferenceResult(mi) - frame = InferenceState(result, run_optimizer, interp) + frame = InferenceState(result, false, interp) frame === nothing && return (nothing, Any) - typeinf(interp, frame) + if typeinf(interp, frame) && run_optimizer + opt_params = OptimizationParams(interp) + result.src = src = OptimizationState(frame, opt_params, interp) + optimize(interp, src, opt_params, ignorelimited(result.result)) + frame.src = finish!(interp, result) + end ccall(:jl_typeinf_end, Cvoid, ()) frame.inferred || return (nothing, Any) return (frame.src, widenconst(ignorelimited(result.result))) diff --git a/base/compiler/types.jl b/base/compiler/types.jl index 9a354fdd101ca..5f8f656312458 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -142,7 +142,7 @@ It contains many parameters used by the compilation pipeline. """ struct NativeInterpreter <: AbstractInterpreter # Cache of inference results for this particular interpreter - cache::Vector{Tuple{InferenceResult,Vector{Any}}} + cache::Vector{InferenceResult} # The world age we're working inside of world::UInt @@ -167,7 +167,7 @@ struct NativeInterpreter <: AbstractInterpreter return new( # Initially empty cache - Tuple{InferenceResult,Vector{Any}}[], + Vector{InferenceResult}(), # world age counter world, diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 53fb188263f9f..9f5c48ffb3209 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -173,7 +173,7 @@ function f_ifelse(x) return b ? x + 1 : x end # 2 for now because the compiler leaves a GotoNode around -@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 +@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 # Test that inlining of _apply_iterate properly hits the inference cache @noinline cprop_inline_foo1() = (1, 1) @@ -538,28 +538,6 @@ end return a, b end end - - # test inlining of un-cached callsites - - import Core.Compiler: isType - - function limited(a) - @nospecialize a - if @noinline(isType(a)) - return @inline(limited(a.parameters[1])) - else - return rand(a) - end - end - - function multilimited(a) - @nospecialize a - if @noinline(isType(a)) - return @inline(multilimited(a.parameters[1])) - else - return rand(Bool) ? rand(a) : @inline(multilimited(a)) - end - end end let code = code_typed1(m.force_inline_explicit, (Int,)) @@ -608,16 +586,6 @@ end let code = code_typed1(m.nested, (Int,Int)) @test count(x->isinvoke(x, :notinlined), code) == 1 end - - let code = code_typed1(m.limited, (Any,)) - @test count(x->isinvoke(x, :isType), code) == 2 # caller + inlined callee - end - let code1 = code_typed1(m.multilimited, (Any,)) - code2 = code_typed1(m.multilimited, (Any,)) - # check that inlining for recursive callsites doesn't depend on inference local cache - @test code1 == code2 - @test count(x->isinvoke(x, :isType), code1) == 3 # caller + inlined callee + inlined callee - end end # force constant-prop' for `setproperty!` From 99cbff66a2f593bf640649c4c0ed5ab61d32d56d Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 24 Aug 2021 15:06:07 +0900 Subject: [PATCH 05/11] style nits --- base/compiler/abstractinterpretation.jl | 3 +- base/compiler/optimize.jl | 7 +-- base/compiler/ssair/inlining.jl | 1 - base/compiler/utilities.jl | 5 ++- base/meta.jl | 5 ++- test/compiler/inline.jl | 58 ++++++++++++------------- 6 files changed, 41 insertions(+), 38 deletions(-) diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index ed7e6a94d4dae..ede05572edc3a 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1911,7 +1911,8 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) changes = StateUpdate(fname, VarState(Any, false), changes, false) end elseif hd === :code_coverage_effect || - (hd !== :boundscheck && hd !== nothing && is_meta_expr_head(hd)) # :boundscheck can be narrowed to Bool + (hd !== :boundscheck && # :boundscheck can be narrowed to Bool + hd !== nothing && is_meta_expr_head(hd)) # these do not generate code else t = abstract_eval_statement(interp, stmt, changes, frame) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index d173db34e4fea..0f4d75b2c5f6c 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -393,15 +393,16 @@ function remove_meta!(@nospecialize(stmt), meta::Vector{Any}) push!(meta, stmt) end return nothing - elseif is_ssaflag_head(head) - # we processed these flags in `jl_code_info_set_ir` + elseif is_preprocessed_flag(head) return nothing end end return stmt end -is_ssaflag_head(head::Symbol) = head === :inbounds || head === :inline || head === :noinline +# check if this expression is preprocessed in `jl_code_info_set_ir` +is_preprocessed_flag(head::Symbol) = + head === :inbounds || head === :inline || head === :noinline function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index dcc424214266d..4ad8857ebfd93 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -662,7 +662,6 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx:: if isa(info, ConstCallInfo) if !is_stmt_noinline(flag) && maybe_handle_const_call!( ir, state1.id, new_stmt, info, new_sig,call.rt, istate, flag, false, todo) - handled = true else info = info.call diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 26e25126c393c..6bce587c2013c 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -59,8 +59,9 @@ end # Meta expression head, these generally can't be deleted even when they are # in a dead branch but can be ignored when analyzing uses/liveness. -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || - head === :loopinfo || head === :inline || head === :noinline) +is_meta_expr_head(head::Symbol) = + head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo || + head === :inline || head === :noinline sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0 diff --git a/base/meta.jl b/base/meta.jl index 3fe815cd0cbc0..57ba263a00af7 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -450,7 +450,8 @@ end _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals) -is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta - || head === :loopinfo || head === :inline || head === :noinline) +is_meta_expr_head(head::Symbol) = + head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo || + head === :inline || head === :noinline end # module diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 9f5c48ffb3209..a04e29e608e6e 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -392,8 +392,8 @@ end code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...)))::Core.CodeInfo).code @testset "@inline/@noinline annotation before definition" begin - m = Module() - @eval m begin + M = Module() + @eval M begin @inline function _def_inline(x) # this call won't be resolved and thus will prevent inlining to happen if we don't # annotate `@inline` at the top of this function body @@ -414,23 +414,23 @@ code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...))):: def_noinline_noconflict(x) = _def_noinline_noconflict(x) end - let code = code_typed1(m.def_inline, (Int,)) + let code = code_typed1(M.def_inline, (Int,)) @test all(code) do x !isinvoke(x, :_def_inline) end end - let code = code_typed1(m.def_noinline, (Int,)) + let code = code_typed1(M.def_noinline, (Int,)) @test any(code) do x isinvoke(x, :_def_noinline) end end # test that they don't conflict with other "before-definition" macros - let code = code_typed1(m.def_inline_noconflict, (Int,)) + let code = code_typed1(M.def_inline_noconflict, (Int,)) @test all(code) do x !isinvoke(x, :_def_inline_noconflict) end end - let code = code_typed1(m.def_noinline_noconflict, (Int,)) + let code = code_typed1(M.def_noinline_noconflict, (Int,)) @test any(code) do x isinvoke(x, :_def_noinline_noconflict) end @@ -438,8 +438,8 @@ code_typed1(args...; kwargs...) = (first(only(code_typed(args...; kwargs...))):: end @testset "@inline/@noinline annotation within a function body" begin - m = Module() - @eval m begin + M = Module() + @eval M begin function _body_inline(x) @inline # this call won't be resolved and thus will prevent inlining to happen if we don't @@ -471,18 +471,18 @@ end end end - let code = code_typed1(m.body_inline, (Int,)) + let code = code_typed1(M.body_inline, (Int,)) @test all(code) do x !isinvoke(x, :_body_inline) end end - let code = code_typed1(m.body_noinline, (Int,)) + let code = code_typed1(M.body_noinline, (Int,)) @test any(code) do x isinvoke(x, :_body_noinline) end end # test annotations for `do` blocks - let code = code_typed1(m.do_inline, (Int,)) + let code = code_typed1(M.do_inline, (Int,)) # what we test here is that both `simple_caller` and the anonymous function that the # `do` block creates should inlined away, and as a result there is only the unresolved call @test all(code) do x @@ -490,7 +490,7 @@ end !isinvoke(x, mi->startswith(string(mi.def.name), '#')) end end - let code = code_typed1(m.do_noinline, (Int,)) + let code = code_typed1(M.do_noinline, (Int,)) # the anonymous function that the `do` block created shouldn't be inlined here @test any(code) do x isinvoke(x, mi->startswith(string(mi.def.name), '#')) @@ -499,8 +499,8 @@ end end @testset "callsite @inline/@noinline annotations" begin - m = Module() - @eval m begin + M = Module() + @eval M begin # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this g = 0 @@ -540,57 +540,57 @@ end end end - let code = code_typed1(m.force_inline_explicit, (Int,)) + let code = code_typed1(M.force_inline_explicit, (Int,)) @test all(x->!isinvoke(x, :noinlined_explicit), code) end - let code = code_typed1(m.force_inline_block_explicit, (Int,)) + let code = code_typed1(M.force_inline_block_explicit, (Int,)) @test all(code) do x !isinvoke(x, :noinlined_explicit) && !isinvoke(x, :(+)) end end - let code = code_typed1(m.force_inline_implicit, (Int,)) + let code = code_typed1(M.force_inline_implicit, (Int,)) @test all(x->!isinvoke(x, :noinlined_implicit), code) end - let code = code_typed1(m.force_inline_block_implicit, (Int,)) + let code = code_typed1(M.force_inline_block_implicit, (Int,)) @test all(x->!isinvoke(x, :noinlined_explicit), code) end - let code = code_typed1(m.force_noinline_explicit, (Int,)) + let code = code_typed1(M.force_noinline_explicit, (Int,)) @test any(x->isinvoke(x, :inlined_explicit), code) end - let code = code_typed1(m.force_noinline_block_explicit, (Int,)) + let code = code_typed1(M.force_noinline_block_explicit, (Int,)) @test count(x->isinvoke(x, :inlined_explicit), code) == 2 end - let code = code_typed1(m.force_noinline_implicit, (Int,)) + let code = code_typed1(M.force_noinline_implicit, (Int,)) @test any(x->isinvoke(x, :inlined_implicit), code) end - let code = code_typed1(m.force_noinline_block_implicit, (Int,)) + let code = code_typed1(M.force_noinline_block_implicit, (Int,)) @test count(x->isinvoke(x, :inlined_implicit), code) == 2 end - let code = code_typed1(m.force_inline_constprop_explicit) + let code = code_typed1(M.force_inline_constprop_explicit) @test all(x->!isinvoke(x, :noinlined_constprop_explicit), code) end - let code = code_typed1(m.force_inline_constprop_implicit) + let code = code_typed1(M.force_inline_constprop_implicit) @test all(x->!isinvoke(x, :noinlined_constprop_implicit), code) end - let code = code_typed1(m.force_noinline_constprop_explicit) + let code = code_typed1(M.force_noinline_constprop_explicit) @test any(x->isinvoke(x, :inlined_constprop_explicit), code) end - let code = code_typed1(m.force_noinline_constprop_implicit) + let code = code_typed1(M.force_noinline_constprop_implicit) @test any(x->isinvoke(x, :inlined_constprop_implicit), code) end - let code = code_typed1(m.nested, (Int,Int)) + let code = code_typed1(M.nested, (Int,Int)) @test count(x->isinvoke(x, :notinlined), code) == 1 end end # force constant-prop' for `setproperty!` # https://github.com/JuliaLang/julia/pull/41882 -let ci = @eval Module() begin +let code = @eval Module() begin # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly # and it leads to inlining failure @@ -608,7 +608,7 @@ let ci = @eval Module() begin $code_typed1(setter, (Vector{Foo},)) end - @test !any(x->isinvoke(x, :setproperty!), ci.code) + @test !any(x->isinvoke(x, :setproperty!), code) end # Issue #41299 - inlining deletes error check in :> From 382a80cc8c484bc77f098e5095f9f6c5e270c990 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 26 Aug 2021 15:08:57 +0900 Subject: [PATCH 06/11] Update base/compiler/ssair/inlining.jl Co-authored-by: Jameson Nash --- base/compiler/ssair/inlining.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 4ad8857ebfd93..c76e74e50c6d0 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -722,7 +722,7 @@ end function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) mi = todo.mi - (; match, atypes) = todo.spec::DelayedInliningSpec + (; match) = todo.spec::DelayedInliningSpec #XXX: update_valid_age!(min_valid[1], max_valid[1], sv) isconst, src = false, nothing From 4ecf149c339db21a36ed6301e48f1567c68a6ca8 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki <40514306+aviatesk@users.noreply.github.com> Date: Thu, 26 Aug 2021 15:10:52 +0900 Subject: [PATCH 07/11] Update src/method.c Co-authored-by: Jameson Nash --- src/method.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/method.c b/src/method.c index 7def65ae90c16..d390d85226e32 100644 --- a/src/method.c +++ b/src/method.c @@ -254,7 +254,7 @@ JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl } } -jl_value_t* expr_arg1(jl_value_t* expr) { +jl_value_t *expr_arg1(jl_value_t *expr) { jl_array_t *args = ((jl_expr_t*)expr)->args; return jl_array_ptr_ref(args, 0); } From 490dda0961eb1f864b7f39aed1c93839b80c4a12 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Thu, 26 Aug 2021 15:57:12 +0900 Subject: [PATCH 08/11] fixup - remove preprocessed flags from `jl_code_info_set_ir` - fix duplicated definition warning - add and fix comments --- base/compiler/optimize.jl | 10 ++-------- base/compiler/utilities.jl | 6 ++++-- base/expr.jl | 6 ------ base/meta.jl | 6 +----- src/method.c | 3 +++ 5 files changed, 10 insertions(+), 21 deletions(-) diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 0f4d75b2c5f6c..09c778d57d695 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -38,9 +38,9 @@ function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_f else # maybe we want to make inference keep the source in a local cache if a statement is going to inlined # and re-optimize it here with disabling further inlining to avoid infinite optimization loop - # (we can even natively try to re-infer it entirely) + # (we can even naively try to re-infer it entirely) # but it seems like that "single-level-inlining" is more trouble and complex than it's worth - # see https://github.com/JuliaLang/julia/pull/41328/commits/5557c2fe70d9672089a17c0f6a9f30fafcf0cb7c + # see https://github.com/JuliaLang/julia/pull/41328/commits/0fc0f71a42b8c9d04b0dafabf3f1f17703abf2e7 return nothing end end @@ -393,17 +393,11 @@ function remove_meta!(@nospecialize(stmt), meta::Vector{Any}) push!(meta, stmt) end return nothing - elseif is_preprocessed_flag(head) - return nothing end end return stmt end -# check if this expression is preprocessed in `jl_code_info_set_ir` -is_preprocessed_flag(head::Symbol) = - head === :inbounds || head === :inline || head === :noinline - function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code svdef = sv.linfo.def diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index 6bce587c2013c..cb7c84934d7d3 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -60,8 +60,10 @@ end # Meta expression head, these generally can't be deleted even when they are # in a dead branch but can be ignored when analyzing uses/liveness. is_meta_expr_head(head::Symbol) = - head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo || - head === :inline || head === :noinline + head === :boundscheck || head === :meta || head === :loopinfo || + # usually we process and remove these expressions in `jl_code_info_set_ir`, + # but we may still see them when inferring toplevel thunks, so let's keep these checks here as well + head === :inbounds || head === :inline || head === :noinline sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0 diff --git a/base/expr.jl b/base/expr.jl index 226dfe5f900a9..ebd8c52bcaa7f 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -252,9 +252,6 @@ Give a hint to the compiler that calls within `block` are worth inlining. macro inline(x) return annotate_meta_def_or_block(x, :inline) end -macro inline() - return Expr(:meta, :inline) -end """ @noinline @@ -332,9 +329,6 @@ Give a hint to the compiler that it should not inline the calls within `block`. macro noinline(x) return annotate_meta_def_or_block(x, :noinline) end -macro noinline() - return Expr(:meta, :noinline) -end """ @pure ex diff --git a/base/meta.jl b/base/meta.jl index 57ba263a00af7..649ffe9d1a19c 100644 --- a/base/meta.jl +++ b/base/meta.jl @@ -440,7 +440,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any}, @assert isa(arg, Union{GlobalRef, Symbol}) return x end - elseif !is_meta_expr_head(head) + elseif !Core.Compiler.is_meta_expr_head(head) partially_inline!(x.args, slot_replacements, type_signature, static_param_values, slot_offset, statement_offset, boundscheck) end @@ -450,8 +450,4 @@ end _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals) -is_meta_expr_head(head::Symbol) = - head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo || - head === :inline || head === :noinline - end # module diff --git a/src/method.c b/src/method.c index d390d85226e32..c17566ac6f936 100644 --- a/src/method.c +++ b/src/method.c @@ -322,6 +322,7 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) inbounds_depth = 0; else if (inbounds_depth > 0) // pop inbounds_depth -= 1; + bd[j] = jl_nothing; } else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == inline_sym) { is_flag_stmt = 1; @@ -332,6 +333,7 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) assert(arg1 == (jl_value_t*)jl_false); arraylist_pop(inline_flags); } + bd[j] = jl_nothing; } else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == noinline_sym) { is_flag_stmt = 1; @@ -342,6 +344,7 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir) assert(arg1 == (jl_value_t*)jl_false); arraylist_pop(inline_flags); } + bd[j] = jl_nothing; } else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) { jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0))); From 783d32d84035665b1cbcc61e1c1891d0de97cd08 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Thu, 26 Aug 2021 16:20:08 +0900 Subject: [PATCH 09/11] more clean up --- base/compiler/utilities.jl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl index cb7c84934d7d3..8dfe1f65f0d53 100644 --- a/base/compiler/utilities.jl +++ b/base/compiler/utilities.jl @@ -59,11 +59,7 @@ end # Meta expression head, these generally can't be deleted even when they are # in a dead branch but can be ignored when analyzing uses/liveness. -is_meta_expr_head(head::Symbol) = - head === :boundscheck || head === :meta || head === :loopinfo || - # usually we process and remove these expressions in `jl_code_info_set_ir`, - # but we may still see them when inferring toplevel thunks, so let's keep these checks here as well - head === :inbounds || head === :inline || head === :noinline +is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0 From 322291b1fbffb0250bea0f5e05ddca7750a6d973 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 1 Sep 2021 03:04:38 +0900 Subject: [PATCH 10/11] add caveat about the recursive call limitation --- base/expr.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/base/expr.jl b/base/expr.jl index ebd8c52bcaa7f..1af1e9486068e 100644 --- a/base/expr.jl +++ b/base/expr.jl @@ -246,6 +246,11 @@ Give a hint to the compiler that calls within `block` are worth inlining. end ``` +!!! warning + Although a callsite annotation will try to force inlining in regardless of the cost model, + there are still chances it can't succeed in it. Especially, recursive calls can not be + inlined even if they are annotated as `@inline`d. + !!! compat "Julia 1.8" The callsite annotation requires at least Julia 1.8. """ From 3805d8623b94a6de743e690f3bfc7b1d8e7970c0 Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Wed, 1 Sep 2021 03:04:55 +0900 Subject: [PATCH 11/11] update NEWS.md --- NEWS.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index f9fb0835afffc..f7f25a5b6a54b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,10 @@ New language features --------------------- * `Module(:name, false, false)` can be used to create a `module` that does not import `Core`. ([#40110]) -* `@inline` and `@noinline` annotations may now be used in function bodies. ([#41312]) +* `@inline` and `@noinline` annotations can be used within a function body to give an extra + hint about the inlining cost to the compiler. ([#41312]) +* `@inline` and `@noinline` annotations can now be applied to a function callsite or block + to enforce the involved function calls to be (or not to be) inlined. ([#41312]) * The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551]) Language changes @@ -39,7 +42,7 @@ New library features * `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text contains "some message" regardless of the specific exception type. - Regular expressions, lists of strings, and matching functions are also supported. ([#41888) + Regular expressions, lists of strings, and matching functions are also supported. ([#41888]) Standard library changes ------------------------