From 2634a30f25907b9f80316538ee3c632e9a37c614 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Sun, 29 Oct 2023 16:16:31 +0100 Subject: [PATCH 1/9] Introduce sizehint!(s, n; shrink = true) to controll shrinkage --- base/abstractdict.jl | 3 +-- base/abstractset.jl | 2 +- base/array.jl | 7 +++++-- base/dict.jl | 6 ++---- base/weakkeydict.jl | 2 +- test/dict.jl | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/base/abstractdict.jl b/base/abstractdict.jl index faeab78afaa53..6cdff09ecf7a3 100644 --- a/base/abstractdict.jl +++ b/base/abstractdict.jl @@ -218,8 +218,7 @@ Dict{Int64, Int64} with 3 entries: function merge!(d::AbstractDict, others::AbstractDict...) for other in others if haslength(d) && haslength(other) - # TODO - do not shrink - sizehint!(d, length(d) + length(other)) + sizehint!(d, length(d) + length(other); shrink = false) end for (k,v) in other d[k] = v diff --git a/base/abstractset.jl b/base/abstractset.jl index 3bfc3bffcd831..b38cb2799740b 100644 --- a/base/abstractset.jl +++ b/base/abstractset.jl @@ -101,7 +101,7 @@ max_values(::Type{Bool}) = 2 max_values(::Type{Nothing}) = 1 function union!(s::AbstractSet{T}, itr) where T - haslength(itr) && _sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false) + haslength(itr) && sizehint!(s, length(s) + Int(length(itr))::Int; shrink = false) for x in itr push!(s, x) length(s) == max_values(T) && break diff --git a/base/array.jl b/base/array.jl index 920e51d37be22..b8582aa9048ba 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1441,7 +1441,7 @@ function resize!(a::Vector, nl::Integer) end """ - sizehint!(s, n) -> s + sizehint!(s, n; shrink = true) -> s Suggest that collection `s` reserve capacity for at least `n` elements. That is, if you expect that you're going to have to push a lot of values onto `s`, you can avoid @@ -1462,10 +1462,13 @@ For types that support `sizehint!`, `Base`. 3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation. + +4. `shrink` controls if the collection can be shrunk. """ function sizehint! end function sizehint!(a::Vector, sz::Integer) + # TODO - controll shrinkage len = length(a) ref = a.ref mem = ref.mem @@ -1494,7 +1497,7 @@ function sizehint!(a::Vector, sz::Integer) end # Fall-back implementation for non-shrinkable collections -_sizehint!(a, sz; shrink) = sizehint!(a, sz) +sizehint!(a, sz; shrink) = sizehint!(a, sz) """ pop!(collection) -> item diff --git a/base/dict.jl b/base/dict.jl index da165c145bbb1..580cf3293beda 100644 --- a/base/dict.jl +++ b/base/dict.jl @@ -228,7 +228,7 @@ end return h end -function _sizehint!(d::Dict{T}, newsz; shrink = true) where T +function sizehint!(d::Dict{T}, newsz; shrink = true) where T oldsz = length(d.slots) # limit new element count to max_values of the key type newsz = min(max(newsz, length(d)), max_values(T)::Int) @@ -237,8 +237,6 @@ function _sizehint!(d::Dict{T}, newsz; shrink = true) where T return (shrink ? newsz == oldsz : newsz <= oldsz) ? d : rehash!(d, newsz) end -sizehint!(d::Dict{T}, newsz) where T = _sizehint!(d, newsz) - """ empty!(collection) -> collection @@ -777,7 +775,7 @@ function map!(f, iter::ValueIterator{<:Dict}) end function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V} - haslength(d2) && _sizehint!(d1, length(d1) + length(d2), shrink = false) + haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink = false) for (k, v) in d2 i, sh = ht_keyindex2_shorthash!(d1, k) if i > 0 diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl index 328f368c80b71..80901fb857b11 100644 --- a/base/weakkeydict.jl +++ b/base/weakkeydict.jl @@ -80,7 +80,7 @@ function _cleanup_locked(h::WeakKeyDict) return h end -sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz) +sizehint!(d::WeakKeyDict, newsz; shrink = true) = sizehint!(d.ht, newsz; shrink = shrink) empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}() IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown() diff --git a/test/dict.jl b/test/dict.jl index 0284ec9399ca4..cfb6522d519ab 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -1492,9 +1492,9 @@ end sizehint!(d, 10) @test length(d.slots) < 100 sizehint!(d, 1000) - Base._sizehint!(d, 1; shrink = false) + sizehint!(d, 1; shrink = false) @test length(d.slots) >= 1000 - Base._sizehint!(d, 1; shrink = true) + sizehint!(d, 1; shrink = true) @test length(d.slots) < 1000 end From d7eceb4082d59f15af87a6e6fd28ec3316a9af38 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Sun, 29 Oct 2023 16:42:51 +0100 Subject: [PATCH 2/9] Add shrinkage control for Vector --- base/array.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/base/array.jl b/base/array.jl index b8582aa9048ba..cadce6b1306a5 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1467,8 +1467,7 @@ For types that support `sizehint!`, """ function sizehint! end -function sizehint!(a::Vector, sz::Integer) - # TODO - controll shrinkage +function sizehint!(a::Vector, sz::Integer; shrink = true) len = length(a) ref = a.ref mem = ref.mem @@ -1477,7 +1476,7 @@ function sizehint!(a::Vector, sz::Integer) sz = max(Int(sz), offset + len - 1) if sz <= memlen # if we don't save at least 1/8th memlen then its not worth it to shrink - if memlen - sz <= div(memlen, 8) + if !shrink || memlen - sz <= div(memlen, 8) return a end newmem = array_new_memory(mem, sz) From 0735eeae6e3ca040160c69a3ea48e50291d1c863 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Sun, 29 Oct 2023 16:49:40 +0100 Subject: [PATCH 3/9] Add shrinkage control for Set --- base/set.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/set.jl b/base/set.jl index a91bf328bd911..88a9d0d531ed7 100644 --- a/base/set.jl +++ b/base/set.jl @@ -117,7 +117,7 @@ copymutable(s::Set{T}) where {T} = Set{T}(s) # Set is the default mutable fall-back copymutable(s::AbstractSet{T}) where {T} = Set{T}(s) -sizehint!(s::Set, newsz) = (sizehint!(s.dict, newsz); s) +sizehint!(s::Set, newsz; shrink = true) = (sizehint!(s.dict, newsz, shrink = shrink); s) empty!(s::Set) = (empty!(s.dict); s) rehash!(s::Set) = (rehash!(s.dict); s) From 271f99929d6fb75f0abf6078277fa3ca210ca4c6 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Tue, 31 Oct 2023 18:32:39 +0100 Subject: [PATCH 4/9] Add tests for Set and Vector --- test/sets.jl | 15 ++++++++++ test/smallarrayshrink.jl | 61 ++++++++++++---------------------------- 2 files changed, 33 insertions(+), 43 deletions(-) diff --git a/test/sets.jl b/test/sets.jl index 694397a1787eb..e9e73fe74534c 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -164,6 +164,19 @@ end sizehint!(s2, 10) @test s2 == GenericSet(s) end + +@testset "shrinking" begin # Similar test as for the underlying Dict + d = Set(i for i = 1:1000) + filter!(x -> x < 10, d) + sizehint!(d, 10) + @test length(d.dict.slots) < 100 + sizehint!(d, 1000) + sizehint!(d, 1; shrink = false) + @test length(d.dict.slots) >= 1000 + sizehint!(d, 1; shrink = true) + @test length(d.dict.slots) < 1000 +end + @testset "rehash!" begin # Use a pointer type to have defined behavior for uninitialized # array element @@ -966,4 +979,6 @@ end end set = TestSet{Any}() @test sizehint!(set, 1) === set + @test sizehint!(set, 1; shrink = true) === set + @test sizehint!(set, 1; shrink = false) === set end diff --git a/test/smallarrayshrink.jl b/test/smallarrayshrink.jl index a1a7df5aee5a5..680a882e432d4 100644 --- a/test/smallarrayshrink.jl +++ b/test/smallarrayshrink.jl @@ -1,45 +1,20 @@ @testset "shrink small array" begin - x = [1, 2, 3, 4] - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4 - sizehint!(x, 10000) - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000 - sizehint!(x, 4) - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4 - - x = [1, 2, 3, 4] - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4 - sizehint!(x, 1000000) - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000 - sizehint!(x, 4) - @test x[1] == 1 - @test x[2] == 2 - @test x[3] == 3 - @test x[4] == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4 - @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4 + function check_array(x, size, capacity) + @test x[1] == 1 + @test x[2] == 2 + @test x[3] == 3 + @test x[4] == 4 + @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == size + @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == capacity + end + for hint_size = [10000, 1000000] + x = [1, 2, 3, 4] + check_array(x, 4, 4) + sizehint!(x, hint_size) + check_array(x, 4, hint_size) + sizehint!(x, 4; shrink = false) + check_array(x, 4, hint_size) + sizehint!(x, 4) + check_array(x, 4, 4) + end end From 620cd2bc452887c8c6293ca155df663825acc95c Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Wed, 1 Nov 2023 10:45:53 +0100 Subject: [PATCH 5/9] lock WeakKeyDict Co-authored-by: Jameson Nash --- base/weakkeydict.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl index 80901fb857b11..da3b15cf4eed5 100644 --- a/base/weakkeydict.jl +++ b/base/weakkeydict.jl @@ -80,7 +80,7 @@ function _cleanup_locked(h::WeakKeyDict) return h end -sizehint!(d::WeakKeyDict, newsz; shrink = true) = sizehint!(d.ht, newsz; shrink = shrink) +sizehint!(d::WeakKeyDict, newsz; shrink = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink) empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}() IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown() From c0c055bb524977e6b942115e5767f88d36ea22f1 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Fri, 3 Nov 2023 10:03:05 +0100 Subject: [PATCH 6/9] Make shrink Bool in base/array.jl Co-authored-by: Jameson Nash --- base/array.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/array.jl b/base/array.jl index cadce6b1306a5..b47ff141609f6 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1467,7 +1467,7 @@ For types that support `sizehint!`, """ function sizehint! end -function sizehint!(a::Vector, sz::Integer; shrink = true) +function sizehint!(a::Vector, sz::Integer; shrink::Bool = true) len = length(a) ref = a.ref mem = ref.mem From 088b152f4999dbf6494234baf53aea1c71d3a14e Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Fri, 3 Nov 2023 10:08:03 +0100 Subject: [PATCH 7/9] Force shrink to be Bool --- base/array.jl | 6 +++--- base/set.jl | 2 +- base/weakkeydict.jl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/base/array.jl b/base/array.jl index cadce6b1306a5..1fa0b94ae04e0 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1441,7 +1441,7 @@ function resize!(a::Vector, nl::Integer) end """ - sizehint!(s, n; shrink = true) -> s + sizehint!(s, n; shrink::Bool = true) -> s Suggest that collection `s` reserve capacity for at least `n` elements. That is, if you expect that you're going to have to push a lot of values onto `s`, you can avoid @@ -1467,7 +1467,7 @@ For types that support `sizehint!`, """ function sizehint! end -function sizehint!(a::Vector, sz::Integer; shrink = true) +function sizehint!(a::Vector, sz::Integer; shrink::Bool = true) len = length(a) ref = a.ref mem = ref.mem @@ -1496,7 +1496,7 @@ function sizehint!(a::Vector, sz::Integer; shrink = true) end # Fall-back implementation for non-shrinkable collections -sizehint!(a, sz; shrink) = sizehint!(a, sz) +sizehint!(a, sz; shrink::Bool) = sizehint!(a, sz) """ pop!(collection) -> item diff --git a/base/set.jl b/base/set.jl index 88a9d0d531ed7..460f6f176f889 100644 --- a/base/set.jl +++ b/base/set.jl @@ -117,7 +117,7 @@ copymutable(s::Set{T}) where {T} = Set{T}(s) # Set is the default mutable fall-back copymutable(s::AbstractSet{T}) where {T} = Set{T}(s) -sizehint!(s::Set, newsz; shrink = true) = (sizehint!(s.dict, newsz, shrink = shrink); s) +sizehint!(s::Set, newsz; shrink::Bool = true) = (sizehint!(s.dict, newsz, shrink = shrink); s) empty!(s::Set) = (empty!(s.dict); s) rehash!(s::Set) = (rehash!(s.dict); s) diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl index 80901fb857b11..5073e55191847 100644 --- a/base/weakkeydict.jl +++ b/base/weakkeydict.jl @@ -80,7 +80,7 @@ function _cleanup_locked(h::WeakKeyDict) return h end -sizehint!(d::WeakKeyDict, newsz; shrink = true) = sizehint!(d.ht, newsz; shrink = shrink) +sizehint!(d::WeakKeyDict, newsz; shrink::Bool = true) = @lock d sizehint!(d.ht, newsz; shrink = shrink) empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}() IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown() From 655a5bc17dea8c9e315b8cdced3044c81ce20525 Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Fri, 3 Nov 2023 10:21:39 +0100 Subject: [PATCH 8/9] Compat + news --- NEWS.md | 1 + base/array.jl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 75a8ac9ab71f8..43423ea52afa1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -55,6 +55,7 @@ New library features -------------------- * `replace(string, pattern...)` now supports an optional `IO` argument to write the output to a stream rather than returning a string ([#48625]). +* `sizehint!(s, n)` now support an optional `shrink` argument to disable shrinking ([#51929]). Standard library changes ------------------------ diff --git a/base/array.jl b/base/array.jl index 1fa0b94ae04e0..dd5e600297f6a 100644 --- a/base/array.jl +++ b/base/array.jl @@ -1464,6 +1464,9 @@ For types that support `sizehint!`, 3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation. 4. `shrink` controls if the collection can be shrunk. + +!!! compat "Julia 1.11" + The `shrink` argument was added in Julia 1.11. """ function sizehint! end From 42ff9ecfde5b76ea17707ea040cf8a1ec836a76c Mon Sep 17 00:00:00 2001 From: Petr Vana Date: Fri, 3 Nov 2023 13:15:34 +0100 Subject: [PATCH 9/9] Typo --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 43423ea52afa1..c96d75cdd4803 100644 --- a/NEWS.md +++ b/NEWS.md @@ -55,7 +55,7 @@ New library features -------------------- * `replace(string, pattern...)` now supports an optional `IO` argument to write the output to a stream rather than returning a string ([#48625]). -* `sizehint!(s, n)` now support an optional `shrink` argument to disable shrinking ([#51929]). +* `sizehint!(s, n)` now supports an optional `shrink` argument to disable shrinking ([#51929]). Standard library changes ------------------------