From 059248fba53a357da1e766d546a4d9785a0e1f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 25 Mar 2021 11:26:46 +0100 Subject: [PATCH] deprecate map on GroupedDataFrame (#2662) --- NEWS.md | 5 +++++ src/abstractdataframe/abstractdataframe.jl | 2 +- src/abstractdataframe/iteration.jl | 6 +++--- src/dataframerow/dataframerow.jl | 2 +- src/groupeddataframe/groupeddataframe.jl | 10 ++++++++-- src/other/precompile.jl | 10 ---------- test/deprecated.jl | 17 +++++++++++++++++ test/grouping.jl | 6 ------ 8 files changed, 35 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 29a515027a..f5cb79ed5e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,11 @@ `convert(::Type{NamedTuple}, key::GroupKey)`, and `convert(::Type{DataFrame}, sdf::SubDataFrame)`; the deprecated methods will be removed in 1.0 release +* as a bug fix `eltype` of vector returned by `eachrow` is now `DataFrameRow` + ([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662)) +* applying `map` to `GroupedDataFrame` is now deprecated. It will + be an error in 1.0 release. + ([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662)) # DataFrames v0.22 Release Notes diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 7bc717eee7..f275c00448 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -1140,7 +1140,7 @@ _filter!_helper_astable(df::AbstractDataFrame, nti::Tables.NamedTupleIterator, f function Base.Matrix(df::AbstractDataFrame) T = reduce(promote_type, (eltype(v) for v in eachcol(df))) - return convert(Matrix{T}, df) + return Matrix{T}(df) end function Base.Matrix{T}(df::AbstractDataFrame) where T diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 2858993338..44a17d4144 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -6,14 +6,14 @@ # Iteration by rows """ - DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow{D, S}} + DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow} Iterator over rows of an `AbstractDataFrame`, with each row represented as a `DataFrameRow`. A value of this type is returned by the [`eachrow`](@ref) function. """ -struct DataFrameRows{D<:AbstractDataFrame, S} <: AbstractVector{DataFrameRow{D, S}} +struct DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow} df::D end @@ -72,7 +72,7 @@ julia> eachrow(view(df, [4, 3], [2, 1])) 2 │ 13 3 ``` """ -eachrow(df::AbstractDataFrame) = DataFrameRows{typeof(df), typeof(index(df))}(df) +eachrow(df::AbstractDataFrame) = DataFrameRows(df) Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear() Base.size(itr::DataFrameRows) = (size(parent(itr), 1), ) diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index 7777081a1b..29bed978a2 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -392,7 +392,7 @@ Base.IteratorEltype(::Type{<:DataFrameRow}) = Base.EltypeUnknown() function Base.Vector(dfr::DataFrameRow) df = parent(dfr) T = reduce(promote_type, (eltype(df[!, i]) for i in parentcols(index(dfr)))) - convert(Vector{T}, dfr) + return Vector{T}(dfr) end Base.Vector{T}(dfr::DataFrameRow) where T = T[dfr[i] for i in 1:length(dfr)] diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl index b351a944e4..002ce3df5f 100644 --- a/src/groupeddataframe/groupeddataframe.jl +++ b/src/groupeddataframe/groupeddataframe.jl @@ -374,9 +374,9 @@ Base.length(gd::GroupedDataFrame) = gd.ngroups function Base.iterate(gd::GroupedDataFrame, i=1) if i > length(gd) - nothing + return nothing else - (view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1) + return (view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1) end end @@ -943,3 +943,9 @@ function _filter_helper_astable(gdf::GroupedDataFrame, nt::NamedTuple, f, return gdf[[f(mapper(i))::Bool for i in 1:length(gdf)]] end + +function Base.map(f, gdf::GroupedDataFrame) + Base.depwarn("Use of the map function on GroupedDataFrame is deprecated. " * + "Use `[f(sdf) for sdf in gdf]` instead.", :map) + return collect(Base.Generator(f, gdf)) +end diff --git a/src/other/precompile.jl b/src/other/precompile.jl index 3ae3e0d203..edc901a390 100644 --- a/src/other/precompile.jl +++ b/src/other/precompile.jl @@ -688,7 +688,6 @@ function precompile(all=false) Base.precompile(fbody, (Bool,Bool,typeof(select),DataFrame,Any,)) end end - Base.precompile(Tuple{typeof(map),Function,DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}) Base.precompile(Tuple{typeof(show),Base.GenericIOBuffer{Array{UInt8,1}},MIME{Symbol("text/html")},DataFrameRow{DataFrame,DataFrames.Index}}) Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(sin),Tuple{Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},DataFrame}}}}}}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{String,1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}}) @@ -776,7 +775,6 @@ function precompile(all=false) Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.mul_prod),Nothing,Nothing},Array{Union{Missing, Int},1},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(getindex),GroupedDataFrame{DataFrame},InvertedIndex{Array{Bool,1}}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.insertcols!)),NamedTuple{(:makeunique,),Tuple{Bool}},typeof(insertcols!),DataFrame,Int,Pair{Symbol,Int}}) - Base.precompile(Tuple{Type{DataFrame},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.push!)),NamedTuple{(:cols,),Tuple{Symbol}},typeof(push!),DataFrame,NamedTuple{(:a,),Tuple{Float64}}}) Base.precompile(Tuple{typeof(transform),GroupedDataFrame{DataFrame},Function}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{Union{Missing, String},1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}}) @@ -820,7 +818,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Bool,1}},Val{true},Nothing}) Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:y, :z),Tuple{Array{Float64,1},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Function,GroupedDataFrame{DataFrame},Nothing}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b2, :v2),Tuple{Array{Union{Missing, Int},1},Array{Union{Missing, Symbol},1},Array{Union{Missing, Float64},1}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(sum),Tuple{DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}}}}}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{Bool,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2, :x_left, :x_right, :ind),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{String,1}}},Type{DataFrame}}) let fbody = try __lookup_kwbody__(which(DataFrames.stack, (DataFrame,Array{Symbol,1},Array{Any,1},))) catch missing end @@ -1401,7 +1398,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,String,Union{Function, Type},Tuple{Array{String,1}}}) Base.precompile(Tuple{typeof(sort),DataFrame,Function}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Pair{Symbol,typeof(sum)},1}}) - Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid, :id_1),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1}}},Type{DataFrame}}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},InvertedIndex{Int},Between{Int,Int}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Type{DataFrame}}) @@ -1647,7 +1643,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Symbol,1}}) Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},Base.OneTo{Int}},Array{Int,1}}}}) isdefined(DataFrames, Symbol("#632#635")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#632#635")),Array{Bool,1}}) - Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}}}) Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},typeof(!),Between{Int,Int}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{String,1}}},Type{DataFrame}}) isdefined(DataFrames, Symbol("#67#74")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#67#74")),Array{Int,1}}) @@ -1861,7 +1856,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(prod),Symbol}}}) Base.precompile(Tuple{typeof(completecases),DataFrame,Regex}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :col),Tuple{Array{Any,1},Array{Any,1}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}},Tuple{Int,Tuple{Int},Tuple{Base.OneTo{Int},Int}}}) Base.precompile(Tuple{typeof(DataFrames._sortperm),SubDataFrame{DataFrame,DataFrames.Index,Array{Int,1}},Base.Sort.MergeSortAlg,DataFrames.DFPerm{Base.Order.ForwardOrdering,Tuple{SubArray{String,1,PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},Tuple{Array{Int,1}},false},SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Array{Int,1}},false}}}}) Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.add_sum),Nothing,typeof(/)},Array{Union{Missing, Number},1},GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Float64,1}},Val{false},Array{Int,1}}) @@ -2571,7 +2565,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Missing}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, DataFrame},1}},Val{false},Array{Int,1}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames._combine_prepare)),NamedTuple{(:copycols, :keepkeys, :ungroup, :keeprows, :renamecols),NTuple{5,Bool}},typeof(DataFrames._combine_prepare),GroupedDataFrame{DataFrame},Colon,Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid),Tuple{Array{Int,1},Array{Int,1}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},typeof(sum)}}) Base.precompile(Tuple{Type{SubDataFrame},SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Cols{Tuple{Array{Int,1}}}}) Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Regex,InvertedIndex{Regex}}}}) Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameColumns{DataFrame}}) @@ -2735,7 +2728,6 @@ function precompile(all=false) Base.precompile(Tuple{typeof(DataFrames.do_call),typeof(sum),Array{Int,1},Array{Int,1},Array{Int,1},GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, Array{Float64,2}},1}},Int}) Base.precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{DataFrame,DataFrame}}}}}) Base.precompile(Tuple{typeof(DataFrames._transformation_helper),DataFrame,AsTable,ByRow{typeof(first)}}) - Base.precompile(Tuple{typeof(DataFrames.escapedprint),Base.GenericIOBuffer{Array{UInt8,1}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index},String}) Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1, :x2),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1},Array{Int,1}},Val{true},Nothing}) Base.precompile(Tuple{typeof(show),GroupedDataFrame{DataFrame}}) Base.precompile(Tuple{typeof(getindex),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Array{Symbol,1}}) @@ -2887,7 +2879,6 @@ function precompile(all=false) isdefined(DataFrames, Symbol("#627#628")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#627#628")),SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Base.OneTo{Int}},true}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2_left, :x_left, :ID2_right, :x_right),Tuple{Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{Int,1},Array{Int,1}}},Type{DataFrame}}) Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}}) - Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Type{Array{T,1} where T}}}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(sum),Symbol}},Pair{Symbol,Pair{typeof(length),Symbol}}}) Base.precompile(Tuple{typeof(DataFrames._combine_rows_with_first!),NamedTuple{(:x1,),Tuple{BigFloat}},Tuple{Array{BigFloat,1}},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{BigInt,1}},Tuple{Symbol},Val{false}}) let fbody = try __lookup_kwbody__(which(issorted, (DataFrame,Array{Any,1},))) catch missing end @@ -2897,7 +2888,6 @@ function precompile(all=false) end Base.precompile(Tuple{typeof(append!),Array{Any,1},Array{Pair{String,ByRow{typeof(-)}},1}}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},InvertedIndex{Symbol},Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N}) - Base.precompile(Tuple{typeof(==),DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},Array{Int,1}},DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}) Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:x2,),Tuple{Array{Bool,1}}},Function,GroupedDataFrame{DataFrame},Nothing}) Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Int,typeof(sum)}}) Base.precompile(Tuple{typeof(allowmissing!),DataFrame,InvertedIndex{InvertedIndex{Array{Int,1}}}}) diff --git a/test/deprecated.jl b/test/deprecated.jl index 1184fb20aa..bcb793832f 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -437,6 +437,23 @@ end @test typeof(df[:, 1]) == Vector{Float64} end +@testset "map on GroupedDataFrame" begin + df = DataFrame(a=1:3, b=4:6, c=7:9) + dfv = @view df[1:3, 1:3] + gdf = groupby(df, :a) + gdfv = groupby(dfv, :a) + + for x in (gdf, gdfv) + @test collect(x) == map(identity, x) + end +end + +@testset "new map behavior" begin + df = DataFrame(g=[1, 2, 3]) + gdf = groupby(df, :g) + @test map(nrow, gdf) == [1, 1, 1] +end + @testset "Conversion tests" begin df = DataFrame() df[!, :A] = 1:5 diff --git a/test/grouping.jl b/test/grouping.jl index 0b2bf91c8e..6cf030fb99 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -2898,12 +2898,6 @@ end @test_throws ArgumentError combine(gdf, (:g, :g) => identity) end -@testset "new map behavior" begin - df = DataFrame(g=[1, 2, 3]) - gdf = groupby(df, :g) - @test map(nrow, gdf) == [1, 1, 1] -end - @testset "check isagg correctly uses fast path only when it should" begin for fun in (sum, prod, mean, var, std, sum∘skipmissing, prod∘skipmissing, mean∘skipmissing, var∘skipmissing, std∘skipmissing),