[Profile] add builtin Allocs.print and formatting equivalents to Prof…

…ile.print Defines a converter for Allocs to the types implemented by Profile for printing, allowing flamegraph report generation of allocations with just the stdlib tooling. Refs: #42768
JuliaLang · Nov 1, 2023 · 665817b · 665817b
1 parent 5db9dbd
commit 665817b
Show file tree

Hide file tree

Showing 5 changed files with 246 additions and 27 deletions.
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
@@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `
 
 ```@docs
 Profile.Allocs.clear
+Profile.Allocs.print
 Profile.Allocs.fetch
 Profile.Allocs.start
 Profile.Allocs.stop

diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
@@ -1,5 +1,8 @@
 module Allocs
 
+global print # Allocs.print is separate from both Base.print and Profile.print
+
+using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
 using Base.StackTraces: StackTrace, StackFrame, lookup
 using Base: InterpreterIP
 
@@ -138,7 +141,7 @@ end
 # Without this, the Alloc's stacktrace prints for lines and lines and lines...
 function Base.show(io::IO, a::Alloc)
     stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
-    print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
+    Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
 end
 
 const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
@@ -216,4 +219,208 @@ function stacktrace_memoized(
     return stack
 end
 
+function warning_empty()
+    @warn """
+    There were no samples collected.
+    Run your program longer (perhaps by running it multiple times),
+    or adjust the frequency of samples to record every event with
+    the `sample_rate=1.0` kwarg."""
+end
+
+
+print(; kwargs...) =
+    Profile.print(stdout, fetch(); kwargs...)
+print(io::IO; kwargs...) =
+    Profile.print(io, fetch(); kwargs...)
+print(io::IO, data::AllocResults; kwargs...) =
+    Profile.print(io, data; kwargs...)
+
+"""
+    Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
+
+Prints profiling results to `io` (by default, `stdout`). If you do not
+supply a `data` vector, the internal buffer of accumulated backtraces
+will be used.
+"""
+function Profile.print(io::IO,
+        data::AllocResults,
+        ;
+        format = :tree,
+        C = false,
+        #combine = true,
+        maxdepth::Int = typemax(Int),
+        mincount::Int = 0,
+        noisefloor = 0,
+        sortedby::Symbol = :filefuncline,
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        )
+
+    pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
+    Profile.print(io, data, pf, format)
+    return
+end
+
+"""
+    Profile.Allocs.print([io::IO = stdout,] data::AllocResults=fetch(); kwargs...)
+
+Prints profiling results to `io`.
+
+See `Profile.Allocs.print([io], data)` for an explanation of the valid keyword arguments.
+"""
+Profile.print(data::AllocResults; kwargs...) =
+    Profile.print(stdout, data; kwargs...)
+
+function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
+    cols::Int = Base.displaysize(io)[2]
+    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
+    data = data.allocs
+    if format === :tree
+        tree(io, data, cols, fmt)
+    elseif format === :flat
+        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
+        flat(io, data, cols, fmt)
+    else
+        throw(ArgumentError("output format $(repr(format)) not recognized"))
+    end
+    nothing
+end
+
+
+function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
+    lilist = StackFrame[]
+    n = Int[]
+    m = Int[]
+    lilist_idx = Dict{T, Int}()
+    recursive = Set{T}()
+    totalbytes = 0
+    for r in data
+        first = true
+        empty!(recursive)
+        nb = r.size # or 1 for counting
+        totalbytes += nb
+        for frame in r.stacktrace
+            !C && frame.from_c && continue
+            key = (T === UInt64 ? ip : frame)
+            idx = get!(lilist_idx, key, length(lilist) + 1)
+            if idx > length(lilist)
+                push!(recursive, key)
+                push!(lilist, frame)
+                push!(n, nb)
+                push!(m, 0)
+            elseif !(key in recursive)
+                push!(recursive, key)
+                n[idx] += nb
+            end
+            if first
+                m[idx] += nb
+                first = false
+            end
+        end
+    end
+    @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
+    return (lilist, n, m, totalbytes)
+end
+
+function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
+    filenamemap = Dict{Symbol,String}()
+    if isempty(lilist)
+        warning_empty()
+        return true
+    end
+    print_flat(io, lilist, n, m, cols, filenamemap, fmt)
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", totalbytes)
+    return false
+end
+
+function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
+    tops = Vector{StackFrameTree{T}}()
+    build = Dict{T, StackFrameTree{T}}()
+    for r in all
+        first = true
+        nb = r.size # or 1 for counting
+        root.recur = 0
+        root.count += nb
+        parent = root
+        for i in reverse(eachindex(r.stacktrace))
+            frame = r.stacktrace[i]
+            key = (T === UInt64 ? ip : frame)
+            if (recur === :flat && !frame.from_c) || recur === :flatc
+                # see if this frame already has a parent
+                this = get!(build, frame, parent)
+                if this !== parent
+                    # Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
+                    push!(tops, parent)
+                    parent = this
+                end
+            end
+            !C && frame.from_c && continue
+            this = get!(StackFrameTree{T}, parent.down, key)
+            if recur === :off || this.recur == 0
+                this.frame = frame
+                this.up = parent
+                this.count += nb
+                this.recur = 1
+            else
+                this.count_recur += 1
+            end
+            parent = this
+        end
+        parent.overhead += nb
+        if recur !== :off
+            # We mark all visited nodes to so we'll only count those branches
+            # once for each backtrace. Reset that now for the next backtrace.
+            empty!(build)
+            push!(tops, parent)
+            for top in tops
+                while top.recur != 0
+                    top.max_recur < top.recur && (top.max_recur = top.recur)
+                    top.recur = 0
+                    top = top.up
+                end
+            end
+            empty!(tops)
+        end
+        let this = parent
+            while this !== root
+                this.flat_count += nb
+                this = this.up
+            end
+        end
+    end
+    function cleanup!(node::StackFrameTree)
+        stack = [node]
+        while !isempty(stack)
+            node = pop!(stack)
+            node.recur = 0
+            empty!(node.builder_key)
+            empty!(node.builder_value)
+            append!(stack, values(node.down))
+        end
+        nothing
+    end
+    cleanup!(root)
+    return root
+end
+
+function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
+    fmt.combine || error(ArgumentError("combine=false"))
+    if fmt.combine
+        root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
+    else
+        root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
+    end
+    print_tree(io, root, cols, fmt, false)
+    if isempty(root.down)
+        warning_empty()
+        return true
+    end
+    Base.println(io, "Total snapshots: ", length(data))
+    Base.println(io, "Total bytes: ", root.count)
+    return false
+end
+
 end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
@@ -220,7 +220,7 @@ function print(io::IO,
 
     pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
     if groupby === :none
-        print(io, data, lidict, pf, format, threads, tasks, false)
+        print_group(io, data, lidict, pf, format, threads, tasks, false)
     else
         if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
             error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
@@ -244,7 +244,7 @@ function print(io::IO,
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
                         printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -262,7 +262,7 @@ function print(io::IO,
                     printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
                     end
@@ -274,7 +274,7 @@ function print(io::IO,
             isempty(taskids) && (any_nosamples = true)
             for taskid in taskids
                 printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
-                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -284,7 +284,7 @@ function print(io::IO,
             isempty(threadids) && (any_nosamples = true)
             for threadid in threadids
                 printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
-                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
             end
@@ -306,7 +306,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
 print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
                 format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
                 is_subsection::Bool = false)
     cols::Int = Base.displaysize(io)[2]

diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
@@ -1,6 +1,13 @@
 using Test
 using Profile: Allocs
 
+Allocs.clear()
+let iobuf = IOBuffer()
+    for format in (:tree, :flat)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
+    end
+end
+
 @testset "alloc profiler doesn't segfault" begin
     res = Allocs.@profile sample_rate=1.0 begin
         # test the allocations during compilation
@@ -13,6 +20,20 @@ using Profile: Allocs
     @test first_alloc.size > 0
     @test length(first_alloc.stacktrace) > 0
     @test length(string(first_alloc.type)) > 0
+
+    # test printing options
+    for options in ((format=:tree, C=true),
+                    (format=:tree, maxdepth=2),
+                    (format=:flat, C=true),
+                    (),
+                    (format=:flat, sortedby=:count),
+                    (format=:tree, recur=:flat),
+                   )
+        iobuf = IOBuffer()
+        Allocs.print(iobuf; options...)
+        str = String(take!(iobuf))
+        @test !isempty(str)
+    end
 end
 
 @testset "alloc profiler works when there are multiple tasks on multiple threads" begin

diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
@@ -38,28 +38,18 @@ let r = Profile.retrieve()
     end
 end
 
-let iobuf = IOBuffer()
-    Profile.print(iobuf, format=:tree, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:tree, maxdepth=2)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, C=true)
-    str = String(take!(iobuf))
-    @test !isempty(str)
-    truncate(iobuf, 0)
-    Profile.print(iobuf)
-    @test !isempty(String(take!(iobuf)))
-    truncate(iobuf, 0)
-    Profile.print(iobuf, format=:flat, sortedby=:count)
-    @test !isempty(String(take!(iobuf)))
-    Profile.print(iobuf, format=:tree, recur=:flat)
+# test printing options
+for options in ((format=:tree, C=true),
+                (format=:tree, maxdepth=2),
+                (format=:flat, C=true),
+                (),
+                (format=:flat, sortedby=:count),
+                (format=:tree, recur=:flat),
+               )
+    iobuf = IOBuffer()
+    Profile.print(iobuf; options...)
     str = String(take!(iobuf))
     @test !isempty(str)
-    truncate(iobuf, 0)
 end
 
 @testset "Profile.print() groupby options" begin