Skip to content

Commit

Permalink
add issetequal and make hash/== generic for AbstractSet (#25368)
Browse files Browse the repository at this point in the history
  • Loading branch information
rfourquet authored and JeffBezanson committed Jan 9, 2018
1 parent 36a492c commit 6a45619
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 41 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,10 @@ This section lists changes that do not have deprecation warnings.
trait; see its documentation for details. Types which support subtraction (operator
`-`) must now implement `widen` for hashing to work inside heterogeneous arrays.

* `AbstractSet` objects are now considered equal by `==` and `isequal` if all of their
elements are equal ([#25368]). This has required changing the hashing algorithm
for `BitSet`.

* `findn(x::AbstractVector)` now return a 1-tuple with the vector of indices, to be
consistent with higher order arrays ([#25365]).

Expand Down
30 changes: 3 additions & 27 deletions base/bitset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -356,33 +356,9 @@ function ==(s1::BitSet, s2::BitSet)
return true
end

issubset(a::BitSet, b::BitSet) = isequal(a, intersect(a,b))
<(a::BitSet, b::BitSet) = (a<=b) && !isequal(a,b)
<=(a::BitSet, b::BitSet) = issubset(a, b)

const hashis_seed = UInt === UInt64 ? 0x88989f1fc7dea67d : 0xc7dea67d
function hash(s::BitSet, h::UInt)
h ⊻= hashis_seed
bc = s.bits
i = 1
j = length(bc)

while j > 0 && bc[j] == CHK0
# Skip trailing empty bytes to prevent extra space from changing the hash
j -= 1
end
while i <= j && bc[i] == CHK0
# Skip leading empty bytes to prevent extra space from changing the hash
i += 1
end
i > j && return h # empty
h = hash(i+s.offset, h) # normalized offset
while j >= i
h = hash(bc[j], h)
j -= 1
end
h
end
issubset(a::BitSet, b::BitSet) = a == intersect(a,b)
(a::BitSet, b::BitSet) = a <= b && a != b


minimum(s::BitSet) = first(s)
maximum(s::BitSet) = last(s)
Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ export
intersect,
isempty,
issubset,
issetequal,
keys,
keytype,
length,
Expand Down
44 changes: 30 additions & 14 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,11 @@ function symdiff!(s::AbstractSet, itr)
s
end

==(l::Set, r::Set) = (length(l) == length(r)) && (l <= r)
<( l::Set, r::Set) = (length(l) < length(r)) && (l <= r)
<=(l::Set, r::Set) = issubset(l, r)
==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l r
# convenience functions for AbstractSet
# (if needed, only their synonyms ⊊ and ⊆ must be specialized)
<( l::AbstractSet, r::AbstractSet) = l r
<=(l::AbstractSet, r::AbstractSet) = l r

"""
issubset(a, b)
Expand All @@ -290,21 +292,35 @@ function issubset(l, r)
end
return true
end

# use the implementation below when it becoms as efficient
# issubset(l, r) = all(_in(r), l)

const = issubset
(l::Set, r::Set) = <(l, r)
(l::Set, r::Set) = !(l, r)
(l, r) = issubset(r, l)
(l::Set, r::Set) = !(l, r)
(l::Set, r::Set) = <(r, l)

(l::T, r::T) where {T<:AbstractSet} = <(l, r)
(l::T, r::T) where {T<:AbstractSet} = !(l, r)
(l::T, r::T) where {T<:AbstractSet} = !(l, r)
(l::T, r::T) where {T<:AbstractSet} = <(r, l)
"""
issetequal(a, b)
Determine whether `a` and `b` have the same elements. Equivalent
to `a ⊆ b && b ⊆ a`.
# Examples
```jldoctest
julia> issetequal([1, 2], [1, 2, 3])
false
julia> issetequal([1, 2], [2, 1])
true
```
"""
issetequal(l, r) = length(l) == length(r) && l r
issetequal(l::AbstractSet, r::AbstractSet) = l == r

(l, r) = length(l) < length(r) && l r
(l, r) = !(l, r)

(l, r) = r l
(l, r) = r l
(l, r) = r l

"""
unique(itr)
Expand Down Expand Up @@ -534,7 +550,7 @@ function mapfilter(pred, f, itr, res)
end

const hashs_seed = UInt === UInt64 ? 0x852ada37cfe8e0ce : 0xcfe8e0ce
function hash(s::Set, h::UInt)
function hash(s::AbstractSet, h::UInt)
hv = hashs_seed
for x in s
hv ⊻= hash(x)
Expand Down
46 changes: 46 additions & 0 deletions test/sets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ end
@test !isequal(Set{Any}([1,2,3,4]), Set{Int}([1,2,3]))
@test !isequal(Set{Int}([1,2,3,4]), Set{Any}([1,2,3]))
end

@testset "hash and == for Set/BitSet" begin
for s = (Set([1]), Set(1:10), Set(-100:7:100))
b = BitSet(s)
@test hash(s) == hash(b)
@test s == b
end
end

@testset "eltype, empty" begin
s1 = empty(Set([1,"hello"]))
@test isequal(s1, Set())
Expand Down Expand Up @@ -536,3 +545,40 @@ end
# avoid recursive call issue #25384
@test_throws MethodError replace!("")
end

@testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
a = [1, 2]
b = [2, 1, 3]
for C = (Tuple, identity, Set, BitSet)
A = C(a)
B = C(b)
@test A B
@test A B
@test !(A B)
@test !(A B)
@test !(A B)
@test A B
@test !(B A)
@test !(B A)
@test B A
@test B A
@test B A
@test !(B A)
@test !issetequal(A, B)
@test !issetequal(B, A)
if A isa AbstractSet && B isa AbstractSet
@test A <= B
@test A < B
@test !(A >= B)
@test !(A > B)
@test !(B <= A)
@test !(B < A)
@test B >= A
@test B > A
end
for D = (Tuple, identity, Set, BitSet)
@test issetequal(A, D(A))
@test !issetequal(A, D(B))
end
end
end

2 comments on commit 6a45619

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily benchmark build, I will reply here when finished:

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here. cc @ararslan

Please sign in to comment.