From b650a45a35e9915b381a4d9137b4f113f4c3a7db Mon Sep 17 00:00:00 2001 From: Mauro Werder Date: Sun, 3 Dec 2017 23:28:28 +0100 Subject: [PATCH] Implemented EgalDict{K,V}, aka ObjectIdDict{K,V} --- base/dict.jl | 276 ++++++++++++++++++++++++++++++++---------------- base/exports.jl | 1 + test/dict.jl | 54 ++++++++++ 3 files changed, 240 insertions(+), 91 deletions(-) diff --git a/base/dict.jl b/base/dict.jl index bd3ac41287e550..af40399c02e883 100644 --- a/base/dict.jl +++ b/base/dict.jl @@ -59,12 +59,38 @@ abstract type AbstractSerializer end # Dict +""" +Abstract supertype of all types following Dict-like setup. + +Subtypes distinguish themselves by implementing different hashing and/or +equality. +""" +abstract type AbstractDict{K,V} <: Associative{K,V} end + +""" + isequalkey(::AbstractDict, k1, k2) + +Set the equality function to use for key-comparison. Needs to be consistent with +the `hashkey` function: isequalkey(h, k1, k2) must imply keyhash(h,k1)==keyhash(h,k2). +""" +function isequalkey end + +""" + keyhash(::AbstractDict, k) + +Set the hash function to use for key-hashing. Needs to be consistent with isequalkey: +isequalkey(h, k1, k2) must imply keyhash(h,k1)==keyhash(h,k2). +""" +function keyhash end + # These can be changed, to trade off better performance for space const global maxallowedprobe = 16 const global maxprobeshift = 6 _tablesz(x::Integer) = x < 16 ? 16 : one(x)<<((sizeof(x)<<3)-leading_zeros(x-1)) + +# The standard dict: """ Dict([itr]) @@ -89,7 +115,7 @@ Dict{String,Int64} with 2 entries: "A" => 1 ``` """ -mutable struct Dict{K,V} <: Associative{K,V} +mutable struct Dict{K,V} <: AbstractDict{K,V} slots::Array{UInt8,1} keys::Array{K,1} vals::Array{V,1} @@ -111,45 +137,125 @@ mutable struct Dict{K,V} <: Associative{K,V} new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) end end -function Dict{K,V}(kv) where V where K - h = Dict{K,V}() - for (k,v) in kv - h[k] = v +isequalkey(::Dict, k1, k2) = isequal(k1, k2) +keyhash(::Dict, k) = hash(k) +const AnyDict = Dict{Any,Any} + +# The Egal Dict, aka ObjectIdDict with type-parameters +""" + EgalDict([itr]) + +`EgalDict{K,V}()` constructs a hash table with keys of type `K` and values of type `V`. + +Given a single iterable argument, constructs a [`EgalDict`](@ref) whose key-value pairs +are taken from 2-tuples `(key,value)` generated by the argument. + +```jldoctest +julia> EgalDict([("A", 1), ("B", 2)]) +EgalDict{String,Int64} with 2 entries: + "B" => 2 + "A" => 1 +``` + +Alternatively, a sequence of pair arguments may be passed. + +```jldoctest +julia> EgalDict("A"=>1, "B"=>2) +EgalDict{String,Int64} with 2 entries: + "B" => 2 + "A" => 1 +``` +""" +mutable struct EgalDict{K,V} <: AbstractDict{K,V} + slots::Array{UInt8,1} + keys::Array{K,1} + vals::Array{V,1} + ndel::Int + count::Int + age::UInt + idxfloor::Int # an index <= the indexes of all used slots + maxprobe::Int + + function EgalDict{K,V}() where V where K + n = 16 + new(zeros(UInt8,n), Vector{K}(uninitialized, n), Vector{V}(uninitialized, n), 0, 0, 0, 1, 0) end - return h -end -Dict{K,V}(p::Pair) where {K,V} = setindex!(Dict{K,V}(), p.second, p.first) -function Dict{K,V}(ps::Pair...) where V where K - h = Dict{K,V}() - sizehint!(h, length(ps)) - for p in ps - h[p.first] = p.second + function EgalDict{K,V}(d::EgalDict{K,V}) where V where K + new(copy(d.slots), copy(d.keys), copy(d.vals), d.ndel, d.count, d.age, + d.idxfloor, d.maxprobe) + end + function EgalDict{K, V}(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) where {K, V} + new(slots, keys, vals, ndel, count, age, idxfloor, maxprobe) end - return h end -# Note the constructors of WeakKeyDict mirror these here, keep in sync. -Dict() = Dict{Any,Any}() -Dict(kv::Tuple{}) = Dict() -copy(d::Dict) = Dict(d) +isequalkey(::EgalDict, k1, k2) = k1===k2 +keyhash(::EgalDict, k) = object_id(k) -const AnyDict = Dict{Any,Any} +# Constructors +for D in [:Dict, :EgalDict] + eval( + quote + function $D{K,V}(kv) where V where K + h = $D{K,V}() + for (k,v) in kv + h[k] = v + end + return h + end + $D{K,V}(p::Pair) where {K,V} = setindex!($D{K,V}(), p.second, p.first) + function $D{K,V}(ps::Pair...) where V where K + h = $D{K,V}() + sizehint!(h, length(ps)) + for p in ps + h[p.first] = p.second + end + return h + end + # Note the constructors of WeakKeyDict mirror these here, keep in sync. + $D() = $D{Any,Any}() + $D(kv::Tuple{}) = $D() + copy(d::$D) = $D(d) + + $D(ps::Pair{K,V}...) where {K,V} = $D{K,V}(ps) + $D(ps::Pair{K}...) where {K} = $D{K,Any}(ps) + $D(ps::(Pair{K,V} where K)...) where {V} = $D{Any,V}(ps) + $D(ps::Pair...) = $D{Any,Any}(ps) + + function $D(kv) + try + associative_with_eltype((K, V) -> $D{K, V}, kv, eltype(kv)) + catch e + if !applicable(start, kv) || !all(x->isa(x,Union{Tuple,Pair}),kv) + throw(ArgumentError("$($D)(kv): kv needs to be an iterator of tuples or pairs")) + else + rethrow(e) + end + end + end + empty(a::$D, ::Type{K}, ::Type{V}) where {K, V} = $D{K, V}() + end + ) +end -Dict(ps::Pair{K,V}...) where {K,V} = Dict{K,V}(ps) -Dict(ps::Pair{K}...) where {K} = Dict{K,Any}(ps) -Dict(ps::(Pair{K,V} where K)...) where {V} = Dict{Any,V}(ps) -Dict(ps::Pair...) = Dict{Any,Any}(ps) - -function Dict(kv) - try - associative_with_eltype((K, V) -> Dict{K, V}, kv, eltype(kv)) - catch e - if !applicable(start, kv) || !all(x->isa(x,Union{Tuple,Pair}),kv) - throw(ArgumentError("Dict(kv): kv needs to be an iterator of tuples or pairs")) +# Conversion to Dict from other Associative +# (does not make sense for EgalDict as keys should not be converted then) +function convert(::Type{Dict{K,V}},d::Associative) where V where K + h = Dict{K,V}() + for (k,v) in d + ck = convert(K,k) + if !haskey(h,ck) + h[ck] = convert(V,v) else - rethrow(e) + error("key collision during dictionary conversion") end end + return h end +convert(::Type{Dict{K,V}},d::Dict{K,V}) where {K,V} = d + + + +# Inner workings TP{K,V} = Union{Type{Tuple{K,V}},Type{Pair{K,V}}} @@ -188,30 +294,14 @@ function grow_to!(dest::Associative{K,V}, itr, st) where V where K return dest end -empty(a::Associative, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}() - -# conversion between Dict types -function convert(::Type{Dict{K,V}},d::Associative) where V where K - h = Dict{K,V}() - for (k,v) in d - ck = convert(K,k) - if !haskey(h,ck) - h[ck] = convert(V,v) - else - error("key collision during dictionary conversion") - end - end - return h -end -convert(::Type{Dict{K,V}},d::Dict{K,V}) where {K,V} = d -hashindex(key, sz) = (((hash(key)%Int) & (sz-1)) + 1)::Int +hashindex(h::AbstractDict, key, sz) = (((keyhash(h, key)%Int) & (sz-1)) + 1)::Int -@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x0 -@propagate_inbounds isslotfilled(h::Dict, i::Int) = h.slots[i] == 0x1 -@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x2 +@propagate_inbounds isslotempty(h::AbstractDict, i::Int) = h.slots[i] == 0x0 +@propagate_inbounds isslotfilled(h::AbstractDict, i::Int) = h.slots[i] == 0x1 +@propagate_inbounds isslotmissing(h::AbstractDict, i::Int) = h.slots[i] == 0x2 -function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K +function rehash!(h::AbstractDict{K,V}, newsz = length(h.keys)) where V where K olds = h.slots oldk = h.keys oldv = h.vals @@ -239,7 +329,7 @@ function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K @inbounds if olds[i] == 0x1 k = oldk[i] v = oldv[i] - index0 = index = hashindex(k, newsz) + index0 = index = hashindex(h, k, newsz) while slots[index] != 0 index = (index & (newsz-1)) + 1 end @@ -273,7 +363,7 @@ max_values(T::Type{<:Union{Void,BitIntegerSmall}}) = 1 << (8*sizeof(T)) max_values(T::Union) = max(max_values(T.a), max_values(T.b)) max_values(::Type{Bool}) = 2 -function sizehint!(d::Dict{T}, newsz) where T +function sizehint!(d::AbstractDict{T}, newsz) where T oldsz = length(d.slots) if newsz <= oldsz # todo: shrink @@ -304,7 +394,7 @@ julia> A Dict{String,Int64} with 0 entries ``` """ -function empty!(h::Dict{K,V}) where V where K +function empty!(h::AbstractDict{K,V}) where V where K fill!(h.slots, 0x0) sz = length(h.slots) empty!(h.keys) @@ -319,18 +409,18 @@ function empty!(h::Dict{K,V}) where V where K end # get the index where a key is stored, or -1 if not present -function ht_keyindex(h::Dict{K,V}, key) where V where K +function ht_keyindex(h::AbstractDict, key) sz = length(h.keys) iter = 0 maxprobe = h.maxprobe - index = hashindex(key, sz) + index = hashindex(h, key, sz) keys = h.keys @inbounds while true if isslotempty(h,index) break end - if !isslotmissing(h,index) && (key === keys[index] || isequal(key,keys[index])) + if !isslotmissing(h,index) && (key === keys[index] || isequalkey(h, key, keys[index])) return index end @@ -344,12 +434,12 @@ end # get the index where a key is stored, or -pos if not present # and the key would be inserted at pos # This version is for use by setindex! and get! -function ht_keyindex2!(h::Dict{K,V}, key) where V where K +function ht_keyindex2!(h::AbstractDict, key) age0 = h.age sz = length(h.keys) iter = 0 maxprobe = h.maxprobe - index = hashindex(key, sz) + index = hashindex(h, key, sz) avail = 0 keys = h.keys @@ -367,7 +457,7 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K # in case "key" already exists in a later collided slot. avail = -index end - elseif key === keys[index] || isequal(key, keys[index]) + elseif key === keys[index] || isequalkey(h, key, keys[index]) return index end @@ -394,7 +484,7 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K return ht_keyindex2!(h, key) end -@propagate_inbounds function _setindex!(h::Dict, v, key, index) +@propagate_inbounds function _setindex!(h::AbstractDict, v, key, index) h.slots[index] = 0x1 h.keys[index] = key h.vals[index] = v @@ -412,15 +502,15 @@ end end end -function setindex!(h::Dict{K,V}, v0, key0) where V where K +function setindex!(h::AbstractDict{K}, v0, key0) where K key = convert(K, key0) - if !isequal(key, key0) + if !isequalkey(h, key, key0) throw(ArgumentError("$key0 is not a valid key for type $K")) end setindex!(h, v0, key) end -function setindex!(h::Dict{K,V}, v0, key::K) where V where K +function setindex!(h::AbstractDict{K,V}, v0, key::K) where V where K v = convert(V, v0) index = ht_keyindex2!(h, key) @@ -461,7 +551,7 @@ Dict{String,Int64} with 4 entries: """ get!(collection, key, default) -get!(h::Dict{K,V}, key0, default) where {K,V} = get!(()->default, h, key0) +get!(h::AbstractDict{K,V}, key0, default) where {K,V} = get!(()->default, h, key0) """ get!(f::Function, collection, key) @@ -479,15 +569,15 @@ end """ get!(f::Function, collection, key) -function get!(default::Callable, h::Dict{K,V}, key0) where V where K +function get!(default::Callable, h::AbstractDict{K,V}, key0) where V where K key = convert(K, key0) - if !isequal(key, key0) + if !isequalkey(h, key, key0) throw(ArgumentError("$key0 is not a valid key for type $K")) end return get!(default, h, key) end -function get!(default::Callable, h::Dict{K,V}, key::K) where V where K +function get!(default::Callable, h::AbstractDict{K,V}, key::K) where V where K index = ht_keyindex2!(h, key) index > 0 && return h.vals[index] @@ -516,7 +606,7 @@ macro get!(h, key0, default) end -function getindex(h::Dict{K,V}, key) where V where K +function getindex(h::AbstractDict{K,V}, key) where V where K index = ht_keyindex(h, key) @inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V end @@ -540,7 +630,7 @@ julia> get(d, "c", 3) """ get(collection, key, default) -function get(h::Dict{K,V}, key, default) where V where K +function get(h::AbstractDict{K,V}, key, default) where V where K index = ht_keyindex(h, key) @inbounds return (index < 0) ? default : h.vals[index]::V end @@ -562,7 +652,7 @@ end """ get(::Function, collection, key) -function get(default::Callable, h::Dict{K,V}, key) where V where K +function get(default::Callable, h::AbstractDict{K,V}, key) where V where K index = ht_keyindex(h, key) @inbounds return (index < 0) ? default() : h.vals[index]::V end @@ -585,8 +675,8 @@ julia> haskey(a,'c') false ``` """ -haskey(h::Dict, key) = (ht_keyindex(h, key) >= 0) -in(key, v::KeySet{<:Any, <:Dict}) = (ht_keyindex(v.dict, key) >= 0) +haskey(h::AbstractDict, key) = (ht_keyindex(h, key) >= 0) +in(key, v::KeySet{<:Any, <:AbstractDict}) = (ht_keyindex(v.dict, key) >= 0) """ getkey(collection, key, default) @@ -606,18 +696,18 @@ julia> getkey(a,'d','a') 'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase) ``` """ -function getkey(h::Dict{K,V}, key, default) where V where K +function getkey(h::AbstractDict{K,V}, key, default) where V where K index = ht_keyindex(h, key) @inbounds return (index<0) ? default : h.keys[index]::K end -function _pop!(h::Dict, index) +function _pop!(h::AbstractDict, index) val = h.vals[index] _delete!(h, index) return val end -function pop!(h::Dict, key) +function pop!(h::AbstractDict, key) index = ht_keyindex(h, key) return index > 0 ? _pop!(h, index) : throw(KeyError(key)) end @@ -646,12 +736,12 @@ julia> pop!(d, "e", 4) """ pop!(collection, key, default) -function pop!(h::Dict, key, default) +function pop!(h::AbstractDict, key, default) index = ht_keyindex(h, key) return index > 0 ? _pop!(h, index) : default end -function pop!(h::Dict) +function pop!(h::AbstractDict) isempty(h) && throw(ArgumentError("dict must be non-empty")) idx = start(h) @inbounds key = h.keys[idx] @@ -660,7 +750,7 @@ function pop!(h::Dict) key => val end -function _delete!(h::Dict, index) +function _delete!(h::AbstractDict, index) h.slots[index] = 0x2 ccall(:jl_arrayunset, Void, (Any, UInt), h.keys, index-1) ccall(:jl_arrayunset, Void, (Any, UInt), h.vals, index-1) @@ -689,7 +779,7 @@ Dict{String,Int64} with 1 entry: """ delete!(collection, key) -function delete!(h::Dict, key) +function delete!(h::AbstractDict, key) index = ht_keyindex(h, key) if index > 0 _delete!(h, index) @@ -697,7 +787,7 @@ function delete!(h::Dict, key) return h end -function skip_deleted(h::Dict, i) +function skip_deleted(h::AbstractDict, i) L = length(h.slots) @inbounds while i<=L && !isslotfilled(h,i) i += 1 @@ -705,27 +795,31 @@ function skip_deleted(h::Dict, i) return i end -function start(t::Dict) +function start(t::AbstractDict) i = skip_deleted(t, t.idxfloor) t.idxfloor = i return i end -done(t::Dict, i) = i > length(t.vals) -@propagate_inbounds function next(t::Dict{K,V}, i) where {K,V} +done(t::AbstractDict, i) = i > length(t.vals) +@propagate_inbounds function next(t::AbstractDict{K,V}, i) where {K,V} return (Pair{K,V}(t.keys[i],t.vals[i]), skip_deleted(t,i+1)) end -isempty(t::Dict) = (t.count == 0) -length(t::Dict) = t.count +isempty(t::AbstractDict) = (t.count == 0) +length(t::AbstractDict) = t.count -@propagate_inbounds function next(v::KeySet{<:Any, <:Dict}, i) +@propagate_inbounds function next(v::KeySet{<:Any, <:AbstractDict}, i) return (v.dict.keys[i], skip_deleted(v.dict,i+1)) end -@propagate_inbounds function next(v::ValueIterator{<:Dict}, i) +@propagate_inbounds function next(v::ValueIterator{<:AbstractDict}, i) return (v.dict.vals[i], skip_deleted(v.dict,i+1)) end -filter!(f, d::Dict) = filter_in_one_pass!(f, d) +filter!(f, d::AbstractDict) = filter_in_one_pass!(f, d) + +################# +# Immutable Dict +################# struct ImmutableDict{K,V} <: Associative{K,V} parent::ImmutableDict{K,V} @@ -798,5 +892,5 @@ length(t::ImmutableDict) = count(x->true, t) isempty(t::ImmutableDict) = done(t, start(t)) empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}() -_similar_for(c::Dict, ::Type{Pair{K,V}}, itr, isz) where {K, V} = empty(c, K, V) +_similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz) where {K, V} = empty(c, K, V) _similar_for(c::Associative, T, itr, isz) = throw(ArgumentError("for Associatives, similar requires an element type of Pair;\n if calling map, consider a comprehension instead")) diff --git a/base/exports.jl b/base/exports.jl index 31af9f6ce78136..e7973e250ad11f 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -79,6 +79,7 @@ export NTuple, Nullable, ObjectIdDict, + EgalDict, OrdinalRange, Pair, PartialQuickSort, diff --git a/test/dict.jl b/test/dict.jl index 7212022e51ea4b..1ef000d49b552b 100644 --- a/test/dict.jl +++ b/test/dict.jl @@ -424,6 +424,60 @@ end @test eltype(d) == Pair{Any,Any} end +# Egal dict +@testset "issue #10647 for EgalDict" begin + a = EgalDict() + a[1] = a + a[a] = 2 + a[3] = T10647(a) + @test a == a + show(IOBuffer(), a) + Base.show(Base.IOContext(IOBuffer(), :limit => true), a) + Base.show(IOBuffer(), a) + Base.show(Base.IOContext(IOBuffer(), :limit => true), a) +end + +@testset "EgalDict" begin + a = EgalDict() + a[1] = a + a[a] = 2 + + sa = empty(a) + @test isempty(sa) + @test isa(sa, EgalDict) + + @test length(a) == 2 + @test 1 in keys(a) + @test a in keys(a) + @test a[1] === a + @test a[a] === 2 + + ca = copy(a) + @test length(ca) == length(a) + @test ca == a + @test ca !== a # make sure they are different objects + + ca = empty!(ca) + @test length(ca) == 0 + @test length(a) == 2 + + d = Dict('a'=>1, 'b'=>1, 'c'=> 3) + @test a != d + + @test length(EgalDict(1=>2, 1.0=>3)) == 2 + @test length(Dict(1=>2, 1.0=>3)) == 1 + + d = @inferred EgalDict(i=>i for i=1:3) + @test isa(d, EgalDict) + @test d == EgalDict(1=>1, 2=>2, 3=>3) + + d = @inferred EgalDict(Pair(1,1), Pair(2,2), Pair(3,3)) + @test isa(d, EgalDict) + @test d == EgalDict(1=>1, 2=>2, 3=>3) + @test eltype(d) == Pair{Int,Int} +end + + @testset "Issue #7944" begin d = Dict{Int,Int}() get!(d, 0) do