From a034aa130d859cbdd8d1d5c78963a42e0f0bfda4 Mon Sep 17 00:00:00 2001 From: Cody Tapscott <84105208+topolarity@users.noreply.github.com> Date: Sun, 14 Jan 2024 03:42:02 -0500 Subject: [PATCH] domtree: Optimize `DFS!` allocations (#52880) Since `DFS!` is a decent fraction of the work for updating the domtree, this should be a noticeable improvement to semi-concrete eval for very large functions. Profiling downstream shows a lot of time spent `push!`ing into the worklist and creating temporary arrays. --- base/compiler/ssair/domtree.jl | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 934cd456dd945..dfe0550d7a06d 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -82,6 +82,8 @@ struct DFSTree # (preorder number -> preorder number) # Storing it this way saves a few lookups in the snca_compress! algorithm to_parent_pre::Vector{PreNumber} + + _worklist::Vector{Tuple{BBNumber, PreNumber, Bool}} end function DFSTree(n_blocks::Int) @@ -89,14 +91,16 @@ function DFSTree(n_blocks::Int) Vector{BBNumber}(undef, n_blocks), zeros(PostNumber, n_blocks), Vector{BBNumber}(undef, n_blocks), - zeros(PreNumber, n_blocks)) + zeros(PreNumber, n_blocks), + Vector{Tuple{BBNumber, PreNumber, Bool}}()) end copy(D::DFSTree) = DFSTree(copy(D.to_pre), copy(D.from_pre), copy(D.to_post), copy(D.from_post), - copy(D.to_parent_pre)) + copy(D.to_parent_pre), + copy(D._worklist)) function copy!(dst::DFSTree, src::DFSTree) copy!(dst.to_pre, src.to_pre) @@ -106,17 +110,26 @@ function copy!(dst::DFSTree, src::DFSTree) copy!(dst.to_parent_pre, src.to_parent_pre) return dst end +function resize!(D::DFSTree, n::Integer) + resize!(D.to_pre, n) + resize!(D.from_pre, n) + resize!(D.to_post, n) + resize!(D.from_post, n) + resize!(D.to_parent_pre, n) +end length(D::DFSTree) = length(D.from_pre) function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool) - copy!(D, DFSTree(length(blocks))) + resize!(D, length(blocks)) + fill!(D.to_pre, 0) + to_visit = D._worklist # always starts empty if is_post_dominator # TODO: We're using -1 as the virtual exit node here. Would it make # sense to actually have a real BB for the exit always? - to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)] + push!(to_visit, (-1, 0, false)) else - to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] + push!(to_visit, (1, 0, false)) end pre_num = is_post_dominator ? 0 : 1 post_num = 1