add benchmarks for some collection operations

JuliaCI · Dec 13, 2017 · 5d09dad · 5d09dad
1 parent bac7236
commit 5d09dad
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 0 deletions.
diff --git a/src/BaseBenchmarks.jl b/src/BaseBenchmarks.jl
@@ -12,6 +12,7 @@ const PARAMS_PATH = joinpath(dirname(@__FILE__), "..", "etc", "params.json")
 const SUITE = BenchmarkGroup()
 const MODULES = Dict("array" => :ArrayBenchmarks,
                      "broadcast" => :BroadcastBenchmarks,
+                     "collection" => :CollectionBenchmarks,
                      "dates" => :DatesBenchmarks,
                      "find" => :FindBenchmarks,
                      "io" => :IOBenchmarks,

diff --git a/src/collection/CollectionBenchmarks.jl b/src/collection/CollectionBenchmarks.jl
@@ -0,0 +1,280 @@
+module CollectionBenchmarks
+
+using BenchmarkTools
+
+# TODO: Remove once Compat has BitSet
+if !isdefined(Base, :BitSet)
+    const BitSet = IntSet
+end
+
+const SUITE = BenchmarkGroup()
+
+const MT = MersenneTwister(0)
+
+const iterlen = 1000
+const ints = rand(MT, 1:iterlen, iterlen)
+const strings = [randstring(MT, rand(MT, 1:30)) for i in 1:iterlen]
+const anys = (shuffle!(MT, [ints; strings;])[1:iterlen])::Vector{Any}
+const sortedints = sort(ints)
+
+# random element, in the collections iff 2nd parameter is true
+let anyshuffled = shuffle(MT, anys),
+    filter = VERSION >= v"0.5.0" ? Iterators.filter : Base.filter
+    d = Dict((Int,    true ) => first(filter(i->isa(i, Int),    anyshuffled))::Int,
+             (String, true ) => first(filter(i->isa(i, String), anyshuffled))::String,
+             (Int,    false) => first(filter(i->!in(i, ints), rand(MT, 1:iterlen) for _ in 1:typemax(Int)))::Int,
+             (String, false) => randstring(MT, 40)::String)
+    global randelt
+    randelt(C, T, bool) = (T == Any && (T = Int); C === Dict ? (d[T, bool] => d[T, bool]) : d[T, bool])
+end
+
+const coll = Dict((Vector, Int)    => ints,
+                  (Vector, String) => strings,
+                  (Vector, Any)    => anys,
+                  (Pair, Int)      => map(Pair, ints, ints),
+                  (Pair, String)   => map(Pair, strings, strings),
+                  (Pair, Any)      => map(Pair, anys, anys),
+                  (BitSet, Int)    => BitSet(ints))
+
+# return a collection that can serve to initialize a container C of type T
+initcoll(C, T) = let initmap = Dict(Vector => Vector,
+                                    Dict   => Pair,
+                                    Set    => Vector,
+                                    BitSet => Vector)
+    coll[initmap[C], T]
+end
+
+for C in (Dict, Set),
+        T in (Int, String, Any)
+    coll[C, T] = C(initcoll(C, T))
+end
+
+@inline newcoll(::Type{C}, ::Type{T}) where {C,T} = C{T}()
+@inline newcoll(::Type{Dict}, ::Type{T}) where {T} = Dict{T,T}()
+@inline newcoll(::Type{BitSet}, ::Type{Int}) = BitSet()
+
+askey(::Type, elt) = elt
+askey(::Type{Dict}, elt) = first(elt)
+
+objstr(T) = T === Int ? "Int" : T === Vector ? "Vector" : string(T)
+
+function foreach_container(bench; T = (Int, String, Any), C = (Vector, Dict, Set, BitSet))
+    for _C in C
+        cstr = objstr(_C)
+        for _T in T
+            _C === BitSet && _T !== Int && continue
+            c = coll[_C, _T]
+            bench(_C, cstr, _T, objstr(_T), c)
+        end
+    end
+end
+
+function set_tolerance!(g, tol=0.25)
+    for b in values(g)
+        b.params.time_tolerance = tol
+    end
+end
+
+##################
+# Initialization #
+##################
+
+g = addgroup!(SUITE, "initialization", ["AbstractVector", "AbstractSet", "Associative"])
+
+function perf_push!(c, elts)
+    for e in elts
+        push!(c, e)
+    end
+end
+
+foreach_container(C = (Vector, Dict, Set)) do C, cstr, T, tstr, c
+    g[cstr, tstr, "iterator"] = @benchmarkable $C($(initcoll(C, T)))
+    g[cstr, tstr, "loop"]     = @benchmarkable perf_push!(newcoll($C, $T), $(initcoll(C, T)))
+    g[cstr, tstr, "loop", "sizehint!"] =
+        @benchmarkable perf_push!(sizehint!(newcoll($C, $T), iterlen), $(initcoll(C, T)))
+end
+
+g["BitSet", "Int", "unsorted", "iterator"]          = @benchmarkable BitSet($ints)
+g["BitSet", "Int", "sorted",   "iterator"]          = @benchmarkable BitSet($sortedints)
+g["BitSet", "Int", "unsorted", "loop"]              = @benchmarkable perf_push!(BitSet(), $ints)
+g["BitSet", "Int", "sorted",   "loop"]              = @benchmarkable perf_push!(BitSet(), $sortedints)
+g["BitSet", "Int", "unsorted", "loop", "sizehint!"] = @benchmarkable perf_push!(sizehint!(BitSet(), iterlen), $ints)
+g["BitSet", "Int", "sorted",   "loop", "sizehint!"] = @benchmarkable perf_push!(sizehint!(BitSet(), iterlen), $sortedints)
+
+set_tolerance!(g)
+
+#############
+# Iteration #
+#############
+
+g = addgroup!(SUITE, "iteration", ["AbstractVector", "AbstractSet", "Associative"])
+
+foreach_container() do C, cstr, T, tstr, c
+    g[cstr, tstr, "start"] = @benchmarkable start($c)
+    g[cstr, tstr, "next"]  = @benchmarkable next($c, $(start(c)))
+    g[cstr, tstr, "done"]  = @benchmarkable done($c, $(start(c)))
+end
+
+set_tolerance!(g)
+
+############
+# Deletion #
+############
+
+g = addgroup!(SUITE, "deletion", ["AbstractVector", "AbstractSet", "Associative"])
+
+function perf_pop!(c)
+    while !isempty(c)
+        pop!(c)
+    end
+end
+
+pred(::Type{C}, ::Type{Any})    where {C} = x -> isa(askey(C, x), Int)
+pred(::Type{C}, ::Type{String}) where {C} = x -> Int(askey(C, x)[1]) < 90
+pred(::Type{C}, ::Type{Int})    where {C} = x -> iseven(askey(C, x))
+
+foreach_container() do C, cstr, T, tstr, c
+    if VERSION >= v"0.7.0-" || C !== Dict
+        g[cstr, tstr, "pop!"] = @benchmarkable perf_pop!(d) setup=(d=copy($c)) evals=1
+    end
+    C === BitSet && return
+    if VERSION >= v"0.7.0-" || C !== Dict
+        g[cstr, tstr, "filter!"] = @benchmarkable filter!($(pred(C, T)), d) setup=(d=copy($c)) evals=1
+        g[cstr, tstr, "filter"] =  @benchmarkable filter( $(pred(C, T)), $c)
+    end
+end
+
+set_tolerance!(g)
+
+#####################
+# Queries & Updates #
+#####################
+
+g = addgroup!(SUITE, "queries & updates", ["AbstractVector", "AbstractSet", "Associative"])
+
+foreach_container() do C, cstr, T, tstr, c
+    if T === Int # seems unnecessary to run those with all types
+        g[cstr, tstr, "length"] = @benchmarkable length($c) # probably useful only for BitSet
+        g[cstr, tstr, "first"]  = @benchmarkable first($c)
+        if C in (Vector, BitSet)
+            g[cstr, tstr, "last"] = @benchmarkable last($c)
+        end
+    end
+    eltin, eltout = randelt(C, T, true), randelt(C, T, false)
+    g[cstr, tstr, "in", "true"]  = @benchmarkable in($eltin, $c)
+    g[cstr, tstr, "in", "false"] = @benchmarkable in($eltout, $c)
+    if C === Vector
+        g[cstr, tstr, "getindex"]  = @benchmarkable $c[$(iterlen÷2)]
+        g[cstr, tstr, "setindex!"] = @benchmarkable d[$(iterlen÷2)] = $eltout setup=(d=copy($c))
+    elseif C === Dict
+        keyin  = askey(C, eltin)
+        keyout = askey(C, eltout)
+        g[cstr, tstr, "getindex"]               = @benchmarkable $c[$keyin]
+        g[cstr, tstr, "setindex!", "overwrite"] = @benchmarkable d[$keyin]  = $keyout setup=(d=copy($c))
+        g[cstr, tstr, "setindex!", "new"]       = @benchmarkable d[$keyout] = $keyout setup=(d=copy($c)) evals=1
+    end
+    if C === Vector
+        g[cstr, tstr, "push!"] = @benchmarkable push!(d, $eltout) setup=(d=copy($c))
+    else
+        g[cstr, tstr, "push!", "overwrite"] = @benchmarkable push!(d, $eltin)             setup=(d=copy($c))
+        g[cstr, tstr, "push!", "new"]       = @benchmarkable push!(d, $eltout)            setup=(d=copy($c)) evals=1
+        # despite the `evals=1` parameter, this gets evaluated more than once, leading to a KeyError
+        # g[cstr, tstr, "pop!",  "specified"] = @benchmarkable  pop!(d, $(askey(C, eltin))) setup=(d=copy($c)) evals=1
+        # so we add a push!
+        g[cstr, tstr, "pop!",  "specified"] = @benchmarkable  pop!(push!(d, $eltin), $(askey(C, eltin))) setup=(d=copy($c))
+
+    end
+    if C !== Dict || VERSION >= v"0.7.0-"
+        # same remark as above, we need to push! a value to avoid ending up with an empty container
+        g[cstr, tstr, "pop!", "unspecified"] = @benchmarkable push!(d, pop!(d)) setup=(d=copy($c))
+    end
+end
+
+set_tolerance!(g)
+
+##################
+# Set operations #
+##################
+
+g = addgroup!(SUITE, "set operations", ["AbstractSet", "Array"])
+const newints = [rand(MT, ints, 10); rand(MT, 1:iterlen, 10); rand(MT, iterlen:2iterlen, 10);]
+
+foreach_container(C = (BitSet, Set, Vector), T = (Int,)) do C, cstr, T, tstr, c
+    g[cstr, tstr, "union"]     = @benchmarkable union($c)
+    g[cstr, tstr, "intersect"] = @benchmarkable intersect($c)
+    g[cstr, tstr, "symdiff"]   = @benchmarkable symdiff($c)
+    for C2 in (BitSet, Set, Vector)
+        c2 = C2(newints)
+        c2str = objstr(C2)
+        g[cstr, tstr, "union",     c2str] = @benchmarkable union($c, $c2)
+        g[cstr, tstr, "intersect", c2str] = @benchmarkable intersect($c, $c2)
+        g[cstr, tstr, "symdiff",   c2str] = @benchmarkable symdiff($c, $c2)
+        g[cstr, tstr, "setdiff",   c2str] = @benchmarkable setdiff($c, $c2)
+
+        g[cstr, tstr, "union", c2str, c2str]     = @benchmarkable union($c, $c2, $c2)
+        g[cstr, tstr, "intersect", c2str, c2str] = @benchmarkable intersect($c, $c2, $c2)
+        g[cstr, tstr, "symdiff", c2str, c2str]   = @benchmarkable symdiff($c, $c2, $c2)
+
+        if C === BitSet && C2 === BitSet
+            g[cstr, tstr, "intersect!", c2str] = @benchmarkable intersect!(d, $c2) setup=(d=copy($c)) evals=1
+        end
+        if C === BitSet
+            g[cstr, tstr, "symdiff!",   c2str] = @benchmarkable symdiff!(d, $c2)   setup=(d=copy($c)) evals=1
+        end
+        if C in (BitSet, Set)
+            g[cstr, tstr, "setdiff!",   c2str] = @benchmarkable setdiff!(d, $c2)   setup=(d=copy($c)) evals=1
+            g[cstr, tstr, "union!",     c2str] = @benchmarkable union!(d, $c2)     setup=(d=copy($c)) evals=1
+        end
+        g[cstr, tstr, "⊆", c2str] = @benchmarkable ⊆($c, $c2)
+    end
+    C === Vector && return
+    empty = newcoll(C, Int)
+    c2  = C(newints)
+    g[cstr,    tstr, "<",  cstr]   = @benchmarkable <($c, $c2)
+    g["empty", tstr, "<",  cstr]   = @benchmarkable <($empty, $c)
+    g["empty", tstr, "⊆",  cstr]   = @benchmarkable ⊆($empty, $c)
+    g[cstr,    tstr, "⊆",  "self"] = @benchmarkable ⊆($c, $c)
+    g[cstr,    tstr, "==", cstr]   = @benchmarkable ==($c, $c2)
+    g[cstr,    tstr, "==", "self"] = @benchmarkable ==($c, $c)
+end
+
+# test BitSet with very large values
+const small, big = BitSet(2), BitSet(2^18)
+g["BitSet", "Int", "union!",     "big"]   = @benchmarkable union!(b, $small)     setup=(b=BitSet(2^18)) evals=1
+g["BitSet", "Int", "union!",     "small"] = @benchmarkable union!(s, $big)       setup=(s=BitSet(2))    evals=1
+g["BitSet", "Int", "intersect!", "big"]   = @benchmarkable intersect!(b, $small) setup=(b=BitSet(2^18)) evals=1
+g["BitSet", "Int", "intersect!", "small"] = @benchmarkable intersect!(s, $big)   setup=(s=BitSet(2))    evals=1
+g["BitSet", "Int", "setdiff!",   "big"]   = @benchmarkable setdiff!(b, $small)   setup=(b=BitSet(2^18)) evals=1
+g["BitSet", "Int", "setdiff!",   "small"] = @benchmarkable setdiff!(s, $big)     setup=(s=BitSet(2))    evals=1
+g["BitSet", "Int", "symdiff!",   "big"]   = @benchmarkable symdiff!(b, $small)   setup=(b=BitSet(2^18)) evals=1
+g["BitSet", "Int", "symdiff!",   "small"] = @benchmarkable symdiff!(s, $big)     setup=(s=BitSet(2))    evals=1
+
+set_tolerance!(g)
+
+#############################################
+# Optimizations for types with "few" values #
+#############################################
+
+# cf. issue #20903 and PR #21964
+g = addgroup!(SUITE, "optimizations", ["Dict", "Set", "BitSet", "Vector"])
+
+for T in (Void, Bool, Int8, UInt16)
+    v::Vector{T} = T === Void ? Vector{Void}(100000) :
+                                rand(MT, one(T):typemax(T), 100000)
+    tstr = string(T)
+    g["Dict", "abstract", tstr] = @benchmarkable Dict($(map(Pair, v, v)))
+    g["Dict", "concrete", tstr] = @benchmarkable Dict{$T,$T}($(map(Pair, v, v)))
+    g["Set",  "abstract", tstr] = @benchmarkable Set($v)
+    g["Set",  "concrete", tstr] = @benchmarkable Set{$T}($v)
+    if T === Void
+        g["Vector", "abstract", tstr] = @benchmarkable Vector($v)
+        g["Vector", "concrete", tstr] = @benchmarkable Vector{$T}($v)
+    elseif T !== Bool && (!(T <: Unsigned) || VERSION >= v"0.7.0-")
+        # there is a bug on 0.6, will probably get fixed on v6.2, cf. #24365
+        g["BitSet", tstr] = @benchmarkable BitSet($v)
+    end
+end
+
+set_tolerance!(g)
+
+end # module