Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce sizehint!(s, n; shrink = true) to controll shrinkage #51929

Merged
merged 11 commits into from
Nov 8, 2023
3 changes: 1 addition & 2 deletions base/abstractdict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,7 @@ Dict{Int64, Int64} with 3 entries:
function merge!(d::AbstractDict, others::AbstractDict...)
for other in others
if haslength(d) && haslength(other)
# TODO - do not shrink
sizehint!(d, length(d) + length(other))
sizehint!(d, length(d) + length(other); shrink = false)
end
for (k,v) in other
d[k] = v
Expand Down
2 changes: 1 addition & 1 deletion base/abstractset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ max_values(::Type{Bool}) = 2
max_values(::Type{Nothing}) = 1

function union!(s::AbstractSet{T}, itr) where T
haslength(itr) && _sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false)
haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false)
for x in itr
push!(s, x)
length(s) == max_values(T) && break
Expand Down
10 changes: 6 additions & 4 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,7 @@ function resize!(a::Vector, nl::Integer)
end

"""
sizehint!(s, n) -> s
sizehint!(s, n; shrink = true) -> s

Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
you expect that you're going to have to push a lot of values onto `s`, you can avoid
Expand All @@ -1462,10 +1462,12 @@ For types that support `sizehint!`,
`Base`.

3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation.

4. `shrink` controls if the collection can be shrunk.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reading this, I don't understand what the argument does. Would you mind adding a more detailed explanation? Also it would seem more appropriate to mention the argument above, outside of the "# Notes on the performance model", as I wouldn't expect to have to read a performance section to find out the meaning of an argument.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this seems like a fair point. The new optional argument enables us to suggest not to shrink the already reserved capacity when suggesting new size by setting shrink = false. This is helpful for algorithms like merging two collection where you want to pre-allocate memory, but there is not reason to change (shrink) size if all the data fits. I'm not a native speaker, would you be able to propose a more fitting description?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've not a native speaker either, but see #52226. I hope I got it right.

"""
function sizehint! end

function sizehint!(a::Vector, sz::Integer)
function sizehint!(a::Vector, sz::Integer; shrink = true)
petvana marked this conversation as resolved.
Show resolved Hide resolved
len = length(a)
ref = a.ref
mem = ref.mem
Expand All @@ -1474,7 +1476,7 @@ function sizehint!(a::Vector, sz::Integer)
sz = max(Int(sz), offset + len - 1)
if sz <= memlen
# if we don't save at least 1/8th memlen then its not worth it to shrink
if memlen - sz <= div(memlen, 8)
if !shrink || memlen - sz <= div(memlen, 8)
return a
end
newmem = array_new_memory(mem, sz)
Expand All @@ -1494,7 +1496,7 @@ function sizehint!(a::Vector, sz::Integer)
end

# Fall-back implementation for non-shrinkable collections
_sizehint!(a, sz; shrink) = sizehint!(a, sz)
sizehint!(a, sz; shrink) = sizehint!(a, sz)

"""
pop!(collection) -> item
Expand Down
6 changes: 2 additions & 4 deletions base/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ end
return h
end

function _sizehint!(d::Dict{T}, newsz; shrink = true) where T
function sizehint!(d::Dict{T}, newsz; shrink = true) where T
oldsz = length(d.slots)
# limit new element count to max_values of the key type
newsz = min(max(newsz, length(d)), max_values(T)::Int)
Expand All @@ -237,8 +237,6 @@ function _sizehint!(d::Dict{T}, newsz; shrink = true) where T
return (shrink ? newsz == oldsz : newsz <= oldsz) ? d : rehash!(d, newsz)
end

sizehint!(d::Dict{T}, newsz) where T = _sizehint!(d, newsz)

"""
empty!(collection) -> collection

Expand Down Expand Up @@ -777,7 +775,7 @@ function map!(f, iter::ValueIterator{<:Dict})
end

function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
haslength(d2) && _sizehint!(d1, length(d1) + length(d2), shrink = false)
haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink = false)
for (k, v) in d2
i, sh = ht_keyindex2_shorthash!(d1, k)
if i > 0
Expand Down
2 changes: 1 addition & 1 deletion base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ copymutable(s::Set{T}) where {T} = Set{T}(s)
# Set is the default mutable fall-back
copymutable(s::AbstractSet{T}) where {T} = Set{T}(s)

sizehint!(s::Set, newsz) = (sizehint!(s.dict, newsz); s)
sizehint!(s::Set, newsz; shrink = true) = (sizehint!(s.dict, newsz, shrink = shrink); s)
empty!(s::Set) = (empty!(s.dict); s)
rehash!(s::Set) = (rehash!(s.dict); s)

Expand Down
2 changes: 1 addition & 1 deletion base/weakkeydict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ function _cleanup_locked(h::WeakKeyDict)
return h
end

sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz)
sizehint!(d::WeakKeyDict, newsz; shrink = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink)
empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()

IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
Expand Down
4 changes: 2 additions & 2 deletions test/dict.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1492,9 +1492,9 @@ end
sizehint!(d, 10)
@test length(d.slots) < 100
sizehint!(d, 1000)
Base._sizehint!(d, 1; shrink = false)
sizehint!(d, 1; shrink = false)
@test length(d.slots) >= 1000
Base._sizehint!(d, 1; shrink = true)
sizehint!(d, 1; shrink = true)
@test length(d.slots) < 1000
end

Expand Down
15 changes: 15 additions & 0 deletions test/sets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,19 @@ end
sizehint!(s2, 10)
@test s2 == GenericSet(s)
end

@testset "shrinking" begin # Similar test as for the underlying Dict
d = Set(i for i = 1:1000)
filter!(x -> x < 10, d)
sizehint!(d, 10)
@test length(d.dict.slots) < 100
sizehint!(d, 1000)
sizehint!(d, 1; shrink = false)
@test length(d.dict.slots) >= 1000
sizehint!(d, 1; shrink = true)
@test length(d.dict.slots) < 1000
end

@testset "rehash!" begin
# Use a pointer type to have defined behavior for uninitialized
# array element
Expand Down Expand Up @@ -966,4 +979,6 @@ end
end
set = TestSet{Any}()
@test sizehint!(set, 1) === set
@test sizehint!(set, 1; shrink = true) === set
@test sizehint!(set, 1; shrink = false) === set
end
61 changes: 18 additions & 43 deletions test/smallarrayshrink.jl
Original file line number Diff line number Diff line change
@@ -1,45 +1,20 @@
@testset "shrink small array" begin
x = [1, 2, 3, 4]
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
sizehint!(x, 10000)
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000
sizehint!(x, 4)
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4

x = [1, 2, 3, 4]
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
sizehint!(x, 1000000)
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000
sizehint!(x, 4)
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
function check_array(x, size, capacity)
@test x[1] == 1
@test x[2] == 2
@test x[3] == 3
@test x[4] == 4
@test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == size
@test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == capacity
end
for hint_size = [10000, 1000000]
x = [1, 2, 3, 4]
check_array(x, 4, 4)
sizehint!(x, hint_size)
check_array(x, 4, hint_size)
sizehint!(x, 4; shrink = false)
check_array(x, 4, hint_size)
sizehint!(x, 4)
check_array(x, 4, 4)
end
end