Skip to content

Commit

Permalink
deprecate map on GroupedDataFrame (#2662)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed Mar 25, 2021
1 parent 7f1df18 commit 059248f
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 23 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
`convert(::Type{NamedTuple}, key::GroupKey)`, and
`convert(::Type{DataFrame}, sdf::SubDataFrame)`; the deprecated methods will be
removed in 1.0 release
* as a bug fix `eltype` of vector returned by `eachrow` is now `DataFrameRow`
([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662))
* applying `map` to `GroupedDataFrame` is now deprecated. It will
be an error in 1.0 release.
([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662))

# DataFrames v0.22 Release Notes

Expand Down
2 changes: 1 addition & 1 deletion src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1140,7 +1140,7 @@ _filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f

function Base.Matrix(df::AbstractDataFrame)
T = reduce(promote_type, (eltype(v) for v in eachcol(df)))
return convert(Matrix{T}, df)
return Matrix{T}(df)
end

function Base.Matrix{T}(df::AbstractDataFrame) where T
Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

# Iteration by rows
"""
DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow{D, S}}
DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}
Iterator over rows of an `AbstractDataFrame`,
with each row represented as a `DataFrameRow`.
A value of this type is returned by the [`eachrow`](@ref) function.
"""
struct DataFrameRows{D<:AbstractDataFrame, S} <: AbstractVector{DataFrameRow{D, S}}
struct DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}
df::D
end

Expand Down Expand Up @@ -72,7 +72,7 @@ julia> eachrow(view(df, [4, 3], [2, 1]))
2 │ 13 3
```
"""
eachrow(df::AbstractDataFrame) = DataFrameRows{typeof(df), typeof(index(df))}(df)
eachrow(df::AbstractDataFrame) = DataFrameRows(df)

Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear()
Base.size(itr::DataFrameRows) = (size(parent(itr), 1), )
Expand Down
2 changes: 1 addition & 1 deletion src/dataframerow/dataframerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ Base.IteratorEltype(::Type{<:DataFrameRow}) = Base.EltypeUnknown()
function Base.Vector(dfr::DataFrameRow)
df = parent(dfr)
T = reduce(promote_type, (eltype(df[!, i]) for i in parentcols(index(dfr))))
convert(Vector{T}, dfr)
return Vector{T}(dfr)
end
Base.Vector{T}(dfr::DataFrameRow) where T =
T[dfr[i] for i in 1:length(dfr)]
Expand Down
10 changes: 8 additions & 2 deletions src/groupeddataframe/groupeddataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,9 @@ Base.length(gd::GroupedDataFrame) = gd.ngroups

function Base.iterate(gd::GroupedDataFrame, i=1)
if i > length(gd)
nothing
return nothing
else
(view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1)
return (view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1)
end
end

Expand Down Expand Up @@ -943,3 +943,9 @@ function _filter_helper_astable(gdf::GroupedDataFrame, nt::NamedTuple, f,

return gdf[[f(mapper(i))::Bool for i in 1:length(gdf)]]
end

function Base.map(f, gdf::GroupedDataFrame)
Base.depwarn("Use of the map function on GroupedDataFrame is deprecated. " *
"Use `[f(sdf) for sdf in gdf]` instead.", :map)
return collect(Base.Generator(f, gdf))
end
10 changes: 0 additions & 10 deletions src/other/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,6 @@ function precompile(all=false)
Base.precompile(fbody, (Bool,Bool,typeof(select),DataFrame,Any,))
end
end
Base.precompile(Tuple{typeof(map),Function,DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(show),Base.GenericIOBuffer{Array{UInt8,1}},MIME{Symbol("text/html")},DataFrameRow{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(sin),Tuple{Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},DataFrame}}}}}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{String,1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}})
Expand Down Expand Up @@ -776,7 +775,6 @@ function precompile(all=false)
Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.mul_prod),Nothing,Nothing},Array{Union{Missing, Int},1},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(getindex),GroupedDataFrame{DataFrame},InvertedIndex{Array{Bool,1}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.insertcols!)),NamedTuple{(:makeunique,),Tuple{Bool}},typeof(insertcols!),DataFrame,Int,Pair{Symbol,Int}})
Base.precompile(Tuple{Type{DataFrame},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.push!)),NamedTuple{(:cols,),Tuple{Symbol}},typeof(push!),DataFrame,NamedTuple{(:a,),Tuple{Float64}}})
Base.precompile(Tuple{typeof(transform),GroupedDataFrame{DataFrame},Function})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{Union{Missing, String},1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}})
Expand Down Expand Up @@ -820,7 +818,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Bool,1}},Val{true},Nothing})
Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:y, :z),Tuple{Array{Float64,1},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Function,GroupedDataFrame{DataFrame},Nothing})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b2, :v2),Tuple{Array{Union{Missing, Int},1},Array{Union{Missing, Symbol},1},Array{Union{Missing, Float64},1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(sum),Tuple{DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}}}}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{Bool,1}}},Type{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2, :x_left, :x_right, :ind),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{String,1}}},Type{DataFrame}})
let fbody = try __lookup_kwbody__(which(DataFrames.stack, (DataFrame,Array{Symbol,1},Array{Any,1},))) catch missing end
Expand Down Expand Up @@ -1401,7 +1398,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,String,Union{Function, Type},Tuple{Array{String,1}}})
Base.precompile(Tuple{typeof(sort),DataFrame,Function})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Pair{Symbol,typeof(sum)},1}})
Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid, :id_1),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},InvertedIndex{Int},Between{Int,Int}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Type{DataFrame}})
Expand Down Expand Up @@ -1647,7 +1643,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Symbol,1}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},Base.OneTo{Int}},Array{Int,1}}}})
isdefined(DataFrames, Symbol("#632#635")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#632#635")),Array{Bool,1}})
Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}}})
Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},typeof(!),Between{Int,Int}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{String,1}}},Type{DataFrame}})
isdefined(DataFrames, Symbol("#67#74")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#67#74")),Array{Int,1}})
Expand Down Expand Up @@ -1861,7 +1856,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(prod),Symbol}}})
Base.precompile(Tuple{typeof(completecases),DataFrame,Regex})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :col),Tuple{Array{Any,1},Array{Any,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}},Tuple{Int,Tuple{Int},Tuple{Base.OneTo{Int},Int}}})
Base.precompile(Tuple{typeof(DataFrames._sortperm),SubDataFrame{DataFrame,DataFrames.Index,Array{Int,1}},Base.Sort.MergeSortAlg,DataFrames.DFPerm{Base.Order.ForwardOrdering,Tuple{SubArray{String,1,PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},Tuple{Array{Int,1}},false},SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Array{Int,1}},false}}}})
Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.add_sum),Nothing,typeof(/)},Array{Union{Missing, Number},1},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Float64,1}},Val{false},Array{Int,1}})
Expand Down Expand Up @@ -2571,7 +2565,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Missing}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, DataFrame},1}},Val{false},Array{Int,1}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames._combine_prepare)),NamedTuple{(:copycols, :keepkeys, :ungroup, :keeprows, :renamecols),NTuple{5,Bool}},typeof(DataFrames._combine_prepare),GroupedDataFrame{DataFrame},Colon,Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid),Tuple{Array{Int,1},Array{Int,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},typeof(sum)}})
Base.precompile(Tuple{Type{SubDataFrame},SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Cols{Tuple{Array{Int,1}}}})
Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Regex,InvertedIndex{Regex}}}})
Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameColumns{DataFrame}})
Expand Down Expand Up @@ -2735,7 +2728,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames.do_call),typeof(sum),Array{Int,1},Array{Int,1},Array{Int,1},GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, Array{Float64,2}},1}},Int})
Base.precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{DataFrame,DataFrame}}}}})
Base.precompile(Tuple{typeof(DataFrames._transformation_helper),DataFrame,AsTable,ByRow{typeof(first)}})
Base.precompile(Tuple{typeof(DataFrames.escapedprint),Base.GenericIOBuffer{Array{UInt8,1}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index},String})
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1, :x2),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1},Array{Int,1}},Val{true},Nothing})
Base.precompile(Tuple{typeof(show),GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(getindex),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Array{Symbol,1}})
Expand Down Expand Up @@ -2887,7 +2879,6 @@ function precompile(all=false)
isdefined(DataFrames, Symbol("#627#628")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#627#628")),SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Base.OneTo{Int}},true}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2_left, :x_left, :ID2_right, :x_right),Tuple{Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{Int,1},Array{Int,1}}},Type{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Type{Array{T,1} where T}}})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(sum),Symbol}},Pair{Symbol,Pair{typeof(length),Symbol}}})
Base.precompile(Tuple{typeof(DataFrames._combine_rows_with_first!),NamedTuple{(:x1,),Tuple{BigFloat}},Tuple{Array{BigFloat,1}},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{BigInt,1}},Tuple{Symbol},Val{false}})
let fbody = try __lookup_kwbody__(which(issorted, (DataFrame,Array{Any,1},))) catch missing end
Expand All @@ -2897,7 +2888,6 @@ function precompile(all=false)
end
Base.precompile(Tuple{typeof(append!),Array{Any,1},Array{Pair{String,ByRow{typeof(-)}},1}})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},InvertedIndex{Symbol},Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N})
Base.precompile(Tuple{typeof(==),DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},Array{Int,1}},DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:x2,),Tuple{Array{Bool,1}}},Function,GroupedDataFrame{DataFrame},Nothing})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Int,typeof(sum)}})
Base.precompile(Tuple{typeof(allowmissing!),DataFrame,InvertedIndex{InvertedIndex{Array{Int,1}}}})
Expand Down
17 changes: 17 additions & 0 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,23 @@ end
@test typeof(df[:, 1]) == Vector{Float64}
end

@testset "map on GroupedDataFrame" begin
df = DataFrame(a=1:3, b=4:6, c=7:9)
dfv = @view df[1:3, 1:3]
gdf = groupby(df, :a)
gdfv = groupby(dfv, :a)

for x in (gdf, gdfv)
@test collect(x) == map(identity, x)
end
end

@testset "new map behavior" begin
df = DataFrame(g=[1, 2, 3])
gdf = groupby(df, :g)
@test map(nrow, gdf) == [1, 1, 1]
end

@testset "Conversion tests" begin
df = DataFrame()
df[!, :A] = 1:5
Expand Down
6 changes: 0 additions & 6 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2898,12 +2898,6 @@ end
@test_throws ArgumentError combine(gdf, (:g, :g) => identity)
end

@testset "new map behavior" begin
df = DataFrame(g=[1, 2, 3])
gdf = groupby(df, :g)
@test map(nrow, gdf) == [1, 1, 1]
end

@testset "check isagg correctly uses fast path only when it should" begin
for fun in (sum, prod, mean, var, std, sumskipmissing, prodskipmissing,
meanskipmissing, varskipmissing, stdskipmissing),
Expand Down

2 comments on commit 059248f

@bkamins
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register branch=0.22_patches

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/32784

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.22.6 -m "<description of version>" 059248fba53a357da1e766d546a4d9785a0e1f25
git push origin v0.22.6

Please sign in to comment.