Skip to content

Commit

Permalink
[Profile] add builtin Allocs.print and formatting equivalents to Prof…
Browse files Browse the repository at this point in the history
…ile.print

Defines a converter for Allocs to the types implemented by Profile for
printing, allowing flamegraph report generation of allocations with just
the stdlib tooling.

Refs: #42768
  • Loading branch information
vtjnash committed Nov 1, 2023
1 parent 5db9dbd commit 665817b
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 27 deletions.
1 change: 1 addition & 0 deletions stdlib/Profile/docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ The methods in `Profile.Allocs` are not exported and need to be called e.g. as `

```@docs
Profile.Allocs.clear
Profile.Allocs.print
Profile.Allocs.fetch
Profile.Allocs.start
Profile.Allocs.stop
Expand Down
209 changes: 208 additions & 1 deletion stdlib/Profile/src/Allocs.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
module Allocs

global print # Allocs.print is separate from both Base.print and Profile.print

using ..Profile: Profile, ProfileFormat, StackFrameTree, print_flat, print_tree
using Base.StackTraces: StackTrace, StackFrame, lookup
using Base: InterpreterIP

Expand Down Expand Up @@ -138,7 +141,7 @@ end
# Without this, the Alloc's stacktrace prints for lines and lines and lines...
function Base.show(io::IO, a::Alloc)
stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
Base.print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
end

const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
Expand Down Expand Up @@ -216,4 +219,208 @@ function stacktrace_memoized(
return stack
end

function warning_empty()
@warn """
There were no samples collected.
Run your program longer (perhaps by running it multiple times),
or adjust the frequency of samples to record every event with
the `sample_rate=1.0` kwarg."""
end


print(; kwargs...) =
Profile.print(stdout, fetch(); kwargs...)
print(io::IO; kwargs...) =
Profile.print(io, fetch(); kwargs...)
print(io::IO, data::AllocResults; kwargs...) =
Profile.print(io, data; kwargs...)

"""
Profile.Allocs.print([io::IO = stdout,] [data::AllocResults = fetch()]; kwargs...)
Prints profiling results to `io` (by default, `stdout`). If you do not
supply a `data` vector, the internal buffer of accumulated backtraces
will be used.
"""
function Profile.print(io::IO,
data::AllocResults,
;
format = :tree,
C = false,
#combine = true,
maxdepth::Int = typemax(Int),
mincount::Int = 0,
noisefloor = 0,
sortedby::Symbol = :filefuncline,
groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
recur::Symbol = :off,
)

pf = ProfileFormat(;C, maxdepth, mincount, noisefloor, sortedby, recur)
Profile.print(io, data, pf, format)
return
end

"""
Profile.Allocs.print([io::IO = stdout,] data::AllocResults=fetch(); kwargs...)
Prints profiling results to `io`.
See `Profile.Allocs.print([io], data)` for an explanation of the valid keyword arguments.
"""
Profile.print(data::AllocResults; kwargs...) =
Profile.print(stdout, data; kwargs...)

function Profile.print(io::IO, data::AllocResults, fmt::ProfileFormat, format::Symbol)
cols::Int = Base.displaysize(io)[2]
fmt.recur (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
data = data.allocs
if format === :tree
tree(io, data, cols, fmt)
elseif format === :flat
fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
flat(io, data, cols, fmt)
else
throw(ArgumentError("output format $(repr(format)) not recognized"))
end
nothing
end


function parse_flat(::Type{T}, data::Vector{Alloc}, C::Bool) where T
lilist = StackFrame[]
n = Int[]
m = Int[]
lilist_idx = Dict{T, Int}()
recursive = Set{T}()
totalbytes = 0
for r in data
first = true
empty!(recursive)
nb = r.size # or 1 for counting
totalbytes += nb
for frame in r.stacktrace
!C && frame.from_c && continue
key = (T === UInt64 ? ip : frame)
idx = get!(lilist_idx, key, length(lilist) + 1)
if idx > length(lilist)
push!(recursive, key)
push!(lilist, frame)
push!(n, nb)
push!(m, 0)
elseif !(key in recursive)
push!(recursive, key)
n[idx] += nb
end
if first
m[idx] += nb
first = false
end
end
end
@assert length(lilist) == length(n) == length(m) == length(lilist_idx)
return (lilist, n, m, totalbytes)
end

function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
fmt.combine || error(ArgumentError("combine=false"))
lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
filenamemap = Dict{Symbol,String}()
if isempty(lilist)
warning_empty()
return true
end
print_flat(io, lilist, n, m, cols, filenamemap, fmt)
Base.println(io, "Total snapshots: ", length(data))
Base.println(io, "Total bytes: ", totalbytes)
return false
end

function tree!(root::StackFrameTree{T}, all::Vector{Alloc}, C::Bool, recur::Symbol) where {T}
tops = Vector{StackFrameTree{T}}()
build = Dict{T, StackFrameTree{T}}()
for r in all
first = true
nb = r.size # or 1 for counting
root.recur = 0
root.count += nb
parent = root
for i in reverse(eachindex(r.stacktrace))
frame = r.stacktrace[i]
key = (T === UInt64 ? ip : frame)
if (recur === :flat && !frame.from_c) || recur === :flatc
# see if this frame already has a parent
this = get!(build, frame, parent)
if this !== parent
# Rewind the `parent` tree back, if this exact ip (FIXME) was already present *higher* in the current tree
push!(tops, parent)
parent = this
end
end
!C && frame.from_c && continue
this = get!(StackFrameTree{T}, parent.down, key)
if recur === :off || this.recur == 0
this.frame = frame
this.up = parent
this.count += nb
this.recur = 1
else
this.count_recur += 1
end
parent = this
end
parent.overhead += nb
if recur !== :off
# We mark all visited nodes to so we'll only count those branches
# once for each backtrace. Reset that now for the next backtrace.
empty!(build)
push!(tops, parent)
for top in tops
while top.recur != 0
top.max_recur < top.recur && (top.max_recur = top.recur)
top.recur = 0
top = top.up
end
end
empty!(tops)
end
let this = parent
while this !== root
this.flat_count += nb
this = this.up
end
end
end
function cleanup!(node::StackFrameTree)
stack = [node]
while !isempty(stack)
node = pop!(stack)
node.recur = 0
empty!(node.builder_key)
empty!(node.builder_value)
append!(stack, values(node.down))
end
nothing
end
cleanup!(root)
return root
end

function tree(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
fmt.combine || error(ArgumentError("combine=false"))
if fmt.combine
root = tree!(StackFrameTree{StackFrame}(), data, fmt.C, fmt.recur)
else
root = tree!(StackFrameTree{UInt64}(), data, fmt.C, fmt.recur)
end
print_tree(io, root, cols, fmt, false)
if isempty(root.down)
warning_empty()
return true
end
Base.println(io, "Total snapshots: ", length(data))
Base.println(io, "Total bytes: ", root.count)
return false
end

end
12 changes: 6 additions & 6 deletions stdlib/Profile/src/Profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ function print(io::IO,

pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
if groupby === :none
print(io, data, lidict, pf, format, threads, tasks, false)
print_group(io, data, lidict, pf, format, threads, tasks, false)
else
if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
Expand All @@ -244,7 +244,7 @@ function print(io::IO,
printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
for threadid in threadids
printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -262,7 +262,7 @@ function print(io::IO,
printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
for taskid in taskids
printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -274,7 +274,7 @@ function print(io::IO,
isempty(taskids) && (any_nosamples = true)
for taskid in taskids
printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
nosamples = print_group(io, data, lidict, pf, format, threads, taskid, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -284,7 +284,7 @@ function print(io::IO,
isempty(threadids) && (any_nosamples = true)
for threadid in threadids
printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
nosamples && (any_nosamples = true)
println(io)
end
Expand All @@ -306,7 +306,7 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
print(stdout, data, lidict; kwargs...)

function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
function print_group(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
is_subsection::Bool = false)
cols::Int = Base.displaysize(io)[2]
Expand Down
21 changes: 21 additions & 0 deletions stdlib/Profile/test/allocs.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
using Test
using Profile: Allocs

Allocs.clear()
let iobuf = IOBuffer()
for format in (:tree, :flat)
Test.@test_logs (:warn, r"^There were no samples collected\.") Allocs.print(iobuf; format, C=true)
end
end

@testset "alloc profiler doesn't segfault" begin
res = Allocs.@profile sample_rate=1.0 begin
# test the allocations during compilation
Expand All @@ -13,6 +20,20 @@ using Profile: Allocs
@test first_alloc.size > 0
@test length(first_alloc.stacktrace) > 0
@test length(string(first_alloc.type)) > 0

# test printing options
for options in ((format=:tree, C=true),
(format=:tree, maxdepth=2),
(format=:flat, C=true),
(),
(format=:flat, sortedby=:count),
(format=:tree, recur=:flat),
)
iobuf = IOBuffer()
Allocs.print(iobuf; options...)
str = String(take!(iobuf))
@test !isempty(str)
end
end

@testset "alloc profiler works when there are multiple tasks on multiple threads" begin
Expand Down
30 changes: 10 additions & 20 deletions stdlib/Profile/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -38,28 +38,18 @@ let r = Profile.retrieve()
end
end

let iobuf = IOBuffer()
Profile.print(iobuf, format=:tree, C=true)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf, format=:tree, maxdepth=2)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf, format=:flat, C=true)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
Profile.print(iobuf)
@test !isempty(String(take!(iobuf)))
truncate(iobuf, 0)
Profile.print(iobuf, format=:flat, sortedby=:count)
@test !isempty(String(take!(iobuf)))
Profile.print(iobuf, format=:tree, recur=:flat)
# test printing options
for options in ((format=:tree, C=true),
(format=:tree, maxdepth=2),
(format=:flat, C=true),
(),
(format=:flat, sortedby=:count),
(format=:tree, recur=:flat),
)
iobuf = IOBuffer()
Profile.print(iobuf; options...)
str = String(take!(iobuf))
@test !isempty(str)
truncate(iobuf, 0)
end

@testset "Profile.print() groupby options" begin
Expand Down

0 comments on commit 665817b

Please sign in to comment.