JuliaData · bkamins · Jun 28, 2021 · Jun 11, 2021 · Jun 18, 2021 · Jun 18, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -14,6 +14,10 @@
 
 ## Bug fixes
 
+* fix bug in how `groupby` handles grouping of float columns;
+  now `-0.0` is treated as *not integer* when deciding on which
+  grouping algorithm should be used
+  ([#2791](https://github.com/JuliaData/DataFrames.jl/pull/2791))
 * fix bug in how `issorted` handles custom orderings and improve performance
   of sorting when complex custom orderings are passed
   ([#2746](https://github.com/JuliaData/DataFrames.jl/pull/2746))

diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
@@ -79,6 +79,11 @@ an `AbstractDict` can be used to index into a grouped data frame where
 the keys are column names of the data frame. The order of the keys does
 not matter in this case.
 
+A column is considered to be an integer column when deciding on the grouping
+algorithm choice if its `eltype` is a subtype of `Union{Missing, Real}`, all its
+elements are either `missing` or pass `isinteger` test, and none of them is
+equal to `-0.0`.
+
 # See also
 
 [`combine`](@ref), [`select`](@ref), [`select!`](@ref), [`transform`](@ref), [`transform!`](@ref)

diff --git a/src/groupeddataframe/utils.jl b/src/groupeddataframe/utils.jl
@@ -143,10 +143,22 @@ function refpool_and_array(x::AbstractArray)
         else
             return nothing, nothing
         end
-    elseif x isa AbstractArray{<:Union{Real, Missing}} &&
-        all(v -> ismissing(v) | isinteger(v), x) &&
-        !isempty(skipmissing(x))
-        minval, maxval = extrema(skipmissing(x))
+    elseif x isa AbstractArray{<:Union{Real, Missing}} && length(x) > 0
+        if !(x isa AbstractArray{<:Union{Integer, Missing}})
+            if !all(v -> (ismissing(v) | isinteger(v)) & !isequal(v, -0.0), x)
+                return nothing, nothing
+            end
+        end
+        if Missing <: eltype(x)
+            smx = skipmissing(x)
+            if isempty(smx)
+                return nothing, nothing
+            else
+                minval, maxval = extrema(smx)
+            end
+        else
+            minval, maxval = extrema(x)
+        end
         ngroups = big(maxval) - big(minval) + 1
         # Threshold chosen with the same rationale as the row_group_slots refpool method:
         # refpool approach is faster but we should not allocate too much memory either

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -3710,6 +3710,14 @@ end
     end
 end
 
+@testset "grouping floats" begin
+    @test length(groupby_checked(DataFrame(a=[0.0, -0.0]), :a)) == 2
+    @test getindex.(keys(groupby_checked(DataFrame(a=[3.0, 2.0, 0.0]), :a)), 1) ==
+          [0, 2, 3]
+    @test getindex.(keys(groupby_checked(DataFrame(a=[3.0, 2.0, -0.0]), :a)), 1) ==
+          [3, 2, 0]
+end
+
 @testset "aggregation with matrix of Pair" begin
     df = DataFrame(a=["a", "b","a", "b"], x=1:4, y=11:14)
     gdf = groupby_checked(df, :a)