try to keep source if it will be force-inlined

JuliaLang · Aug 21, 2021 · 5557c2f · 5557c2f
1 parent 34e8c07
commit 5557c2f
Show file tree

Hide file tree

Showing 8 changed files with 99 additions and 53 deletions.
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
@@ -532,8 +532,8 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul
     mi === nothing && return nothing
     # try constant prop'
     inf_cache = get_inference_cache(interp)
-    inf_result = cache_lookup(mi, argtypes, inf_cache)
-    if inf_result === nothing
+    cache = cache_lookup(mi, argtypes, inf_cache)
+    if cache === nothing
         # if there might be a cycle, check to make sure we don't end up
         # calling ourselves here.
         let result = result # prevent capturing
@@ -552,8 +552,10 @@ function abstract_call_method_with_const_args(interp::AbstractInterpreter, resul
         frame = InferenceState(inf_result, #=cache=#false, interp)
         frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
         frame.parent = sv
-        push!(inf_cache, inf_result)
+        push!(inf_cache, (inf_result, frame.stmt_info))
         typeinf(interp, frame) || return nothing
+    else
+        inf_result, _ = cache
     end
     result = inf_result.result
     # if constant inference hits a cycle, just bail out
@@ -590,7 +592,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
         return nothing
     end
     mi = mi::MethodInstance
-    if !force && !const_prop_methodinstance_heuristic(interp, match, mi)
+    if !force && !const_prop_methodinstance_heuristic(interp, match, mi, sv)
         add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
         return nothing
     end
@@ -692,7 +694,7 @@ end
 # This is a heuristic to avoid trying to const prop through complicated functions
 # where we would spend a lot of time, but are probably unlikely to get an improved
 # result anyway.
-function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance)
+function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance, sv::InferenceState)
     method = match.method
     if method.is_for_opaque_closure
         # Not inlining an opaque closure can be very expensive, so be generous
@@ -711,8 +713,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match:
     if isdefined(code, :inferred) && !cache_inlineable
         cache_inf = code.inferred
         if !(cache_inf === nothing)
-            # TODO maybe we want to respect callsite `@inline`/`@noinline` annotations here ?
-            cache_inlineable = inlining_policy(interp, cache_inf, 0x00, match) !== nothing
+            src = inlining_policy(interp, cache_inf, get_curr_ssaflag(sv), nothing)
+            cache_inlineable = src !== nothing
         end
     end
     if !cache_inlineable

diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
@@ -141,12 +141,12 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing, va_override::
     return cache_argtypes, falses(length(cache_argtypes))
 end
 
-function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{InferenceResult})
+function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache::Vector{Tuple{InferenceResult,Vector{Any}}})
     method = linfo.def::Method
     nargs::Int = method.nargs
     method.isva && (nargs -= 1)
     length(given_argtypes) >= nargs || return nothing
-    for cached_result in cache
+    for (cached_result, stmt_info) in cache
         cached_result.linfo === linfo || continue
         cache_match = true
         cache_argtypes = cached_result.argtypes
@@ -165,7 +165,7 @@ function cache_lookup(linfo::MethodInstance, given_argtypes::Vector{Any}, cache:
                                            cache_overridden_by_const[end])
         end
         cache_match || continue
-        return cached_result
+        return cached_result, stmt_info
     end
     return nothing
 end
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
@@ -113,7 +113,7 @@ mutable struct InferenceState
             CachedMethodTable(method_table(interp)),
             interp)
         result.result = frame
-        cached && push!(get_inference_cache(interp), result)
+        cached && push!(get_inference_cache(interp), (result, stmt_info))
         return frame
     end
 end
@@ -296,3 +296,5 @@ function print_callstack(sv::InferenceState)
         sv = sv.parent
     end
 end
+
+get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -28,29 +28,57 @@ struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter
     interp::I
 end
 
-function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8, match::Union{MethodMatch,InferenceResult})
+include("compiler/ssair/driver.jl")
+
+function inlining_policy(interp::AbstractInterpreter, @nospecialize(src),
+                         stmt_flag::UInt8, todo::Union{Nothing,InliningTodo})
     if isa(src, CodeInfo) || isa(src, Vector{UInt8})
         src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
         src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
         return src_inferred && src_inlineable ? src : nothing
     elseif isa(src, OptimizationState) && isdefined(src, :ir)
         return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
-    elseif src === nothing && is_stmt_inline(stmt_flag) && isa(match, MethodMatch)
-        # when the source isn't available at this moment, try to re-infer and inline it
-        # NOTE we can make inference try to keep the source if the call is going to be inlined,
-        # but then inlining will depend on local state of inference and so the first entry
-        # and the succeeding ones may generate different code; rather we always re-infer
-        # the source to avoid the problem while it's obviously not most efficient
-        # HACK disable inlining for the re-inference to avoid cycles by making sure the following inference never comes here again
-        interp = NativeInterpreter(get_world_counter(interp); opt_params = OptimizationParams(; inlining = false))
-        src, rt = typeinf_code(interp, match.method, match.spec_types, match.sparams, true)
-        return src
+    elseif src === nothing && todo !== nothing && is_stmt_inline(stmt_flag)
+        # if this statement is forced to be inlined, try additional effort to find the source
+        # in the local cache, and if found optimize and inline it
+        mi = todo.mi
+        (; match, atypes, stmttype) = todo.spec::DelayedInliningSpec
+        if isa(match, MethodMatch)
+            cache = cache_lookup(mi, atypes, get_inference_cache(interp))
+            cache === nothing && return nothing
+            inf_result, stmt_info = cache
+        else
+            local cache = nothing
+            for (inf_result, stmt_info) in get_inference_cache(interp)
+                if inf_result === match
+                    cache = inf_result, stmt_info
+                    break
+                end
+            end
+            cache === nothing && return nothing
+            inf_result, stmt_info = cache
+        end
+        src = inf_result.src
+        if isa(src, CodeInfo)
+        elseif isa(src, OptimizationState)
+            src = src.src
+        else
+            return nothing
+        end
+        # HACK disable inlining for this optimization, otherwise we're likely to come back to here again
+        params = OptimizationParams(interp)
+        newparams = OptimizationParams(; inlining = false,
+                                         max_methods = params.MAX_METHODS,
+                                         tuple_splat = params.MAX_TUPLE_SPLAT,
+                                         union_splitting = params.MAX_UNION_SPLITTING,
+                                         unoptimize_throw_blocks = params.unoptimize_throw_blocks)
+        opt = OptimizationState(mi, copy(src), newparams, interp; stmt_info)
+        optimize(interp, opt, newparams, stmttype)
+        return opt.ir
     end
     return nothing
 end
 
-include("compiler/ssair/driver.jl")
-
 mutable struct OptimizationState
     linfo::MethodInstance
     src::CodeInfo
@@ -72,7 +100,8 @@ mutable struct OptimizationState
                    frame.sptypes, frame.slottypes, false,
                    inlining)
     end
-    function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter)
+    function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter;
+                               stmt_info::Union{Nothing,Vector{Any}} = nothing)
         # prepare src for running optimization passes
         # if it isn't already
         nssavalues = src.ssavaluetypes
@@ -86,7 +115,9 @@ mutable struct OptimizationState
         if slottypes === nothing
             slottypes = Any[ Any for i = 1:nslots ]
         end
-        stmt_info = Any[nothing for i = 1:nssavalues]
+        if stmt_info === nothing
+            stmt_info = Any[nothing for i = 1:nssavalues]
+        end
         # cache some useful state computations
         def = linfo.def
         mod = isa(def, Method) ? def.module : def
@@ -103,10 +134,11 @@ mutable struct OptimizationState
     end
 end
 
-function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
+function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter;
+                           stmt_info::Union{Nothing,Vector{Any}} = nothing)
     src = retrieve_code_info(linfo)
     src === nothing && return nothing
-    return OptimizationState(linfo, src, params, interp)
+    return OptimizationState(linfo, src, params, interp; stmt_info)
 end
 
 function ir_to_codeinf!(opt::OptimizationState)

diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
@@ -722,7 +722,8 @@ function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo)::
 end
 
 function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
-    (; match) = todo.spec::DelayedInliningSpec
+    mi = todo.mi
+    (; match, atypes) = todo.spec::DelayedInliningSpec
 
     #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
     isconst, src = false, nothing
@@ -737,7 +738,7 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
             isconst, src = false, inferred_src
         end
     else
-        linfo = get(state.mi_cache, todo.mi, nothing)
+        linfo = get(state.mi_cache, mi, nothing)
         if linfo isa CodeInstance
             if invoke_api(linfo) == 2
                 # in this case function can be inlined to a constant
@@ -753,11 +754,11 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
     et = state.et
 
     if isconst && et !== nothing
-        push!(et, todo.mi)
+        push!(et, mi)
         return ConstantCase(src)
     end
 
-    src = inlining_policy(state.interp, src, flag, match)
+    src = inlining_policy(state.interp, src, flag, todo)
 
     if src === nothing
         return compileable_specialization(et, match)
@@ -767,8 +768,8 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
         src = copy(src)
     end
 
-    et !== nothing && push!(et, todo.mi)
-    return InliningTodo(todo.mi, src)
+    et !== nothing && push!(et, mi)
+    return InliningTodo(mi, src)
 end
 
 function resolve_todo(todo::UnionSplit, state::InliningState, flag::UInt8)

diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
@@ -438,34 +438,39 @@ function finish(me::InferenceState, interp::AbstractInterpreter)
     # inspect whether our inference had a limited result accuracy,
     # else it may be suitable to cache
     me.bestguess = cycle_fix_limited(me.bestguess, me)
+    parent = me.parent
     limited_ret = me.bestguess isa LimitedAccuracy
     limited_src = false
     if !limited_ret
         gt = me.src.ssavaluetypes::Vector{Any}
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
-            if gtj isa LimitedAccuracy && me.parent !== nothing
+            if gtj isa LimitedAccuracy && parent !== nothing
                 limited_src = true
                 break
             end
         end
     end
     if limited_ret
         # a parent may be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
+        # we can throw everything else away now, unless inlinear will still want to have the inferred source
+        if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent)))
+            me.result.src = nothing
+        end
         me.cached = false
         me.src.inlineable = false
         unlock_mi_inference(interp, me.linfo)
     elseif limited_src
         # a type result will be cached still, but not this intermediate work:
-        # we can throw everything else away now
-        me.result.src = nothing
+        # we can throw everything else away now, unless inlinear will still want to have the inferred source
+        if !(parent !== nothing && is_stmt_inline(get_curr_ssaflag(parent)))
+            me.result.src = nothing
+        end
         me.src.inlineable = false
     else
         # annotate fulltree with type information,
         # either because we are the outermost code, or we might use this later
-        doopt = (me.cached || me.parent !== nothing)
+        doopt = (me.cached || parent !== nothing)
         type_annotate!(me, doopt)
         if doopt && may_optimize(interp)
             me.result.src = OptimizationState(me, OptimizationParams(interp), interp)
@@ -834,14 +839,9 @@ function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize
     mi = specialize_method(method, atypes, sparams)::MethodInstance
     ccall(:jl_typeinf_begin, Cvoid, ())
     result = InferenceResult(mi)
-    frame = InferenceState(result, false, interp)
+    frame = InferenceState(result, run_optimizer, interp)
     frame === nothing && return (nothing, Any)
-    if typeinf(interp, frame) && run_optimizer
-        opt_params = OptimizationParams(interp)
-        result.src = src = OptimizationState(frame, opt_params, interp)
-        optimize(interp, src, opt_params, ignorelimited(result.result))
-        frame.src = finish!(interp, result)
-    end
+    typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
     frame.inferred || return (nothing, Any)
     return (frame.src, widenconst(ignorelimited(result.result)))

diff --git a/base/compiler/types.jl b/base/compiler/types.jl
@@ -143,7 +143,7 @@ It contains many parameters used by the compilation pipeline.
 """
 struct NativeInterpreter <: AbstractInterpreter
     # Cache of inference results for this particular interpreter
-    cache::Vector{InferenceResult}
+    cache::Vector{Tuple{InferenceResult,Vector{Any}}}
     # The world age we're working inside of
     world::UInt
 
@@ -168,7 +168,7 @@ struct NativeInterpreter <: AbstractInterpreter
 
         return new(
             # Initially empty cache
-            Vector{InferenceResult}(),
+            Tuple{InferenceResult,Vector{Any}}[],
 
             # world age counter
             world,

diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
@@ -173,7 +173,7 @@ function f_ifelse(x)
     return b ? x + 1 : x
 end
 # 2 for now because the compiler leaves a GotoNode around
-@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
+@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
 
 # Test that inlining of _apply_iterate properly hits the inference cache
 @noinline cprop_inline_foo1() = (1, 1)
@@ -543,9 +543,17 @@ end
 
         import Core.Compiler: isType
 
-        limited(a) = @noinline(isType(a)) ? @inline(limited(a.parameters[1])) : rand(a)
+        function limited(a)
+            @nospecialize a
+            if @noinline(isType(a))
+                return @inline(limited(a.parameters[1]))
+            else
+                return rand(a)
+            end
+        end
 
         function multilimited(a)
+            @nospecialize a
             if @noinline(isType(a))
                 return @inline(multilimited(a.parameters[1]))
             else
@@ -602,12 +610,13 @@ end
     end
 
     let code = code_typed1(m.limited, (Any,))
-        @test count(x->isinvoke(x, :isType), code) == 2
+        @test count(x->isinvoke(x, :isType), code) == 2 # caller + inlined callee
     end
-    # check that inlining for recursive callsites doesn't depend on inference local cache
     let code1 = code_typed1(m.multilimited, (Any,))
         code2 = code_typed1(m.multilimited, (Any,))
+        # check that inlining for recursive callsites doesn't depend on inference local cache
         @test code1 == code2
+        @test count(x->isinvoke(x, :isType), code1) == 3 # caller + inlined callee + inlined callee
     end
 end