Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] Make DataFrameColumns stop being an AbstractVector #2291

Merged
merged 13 commits into from
Jun 24, 2020
46 changes: 25 additions & 21 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,28 @@ Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(pare

# Iteration by columns

const DATAFRAMECOLUMNS_DOCSTR = """
Indexing into `DataFrameColumns` objects using integer, `Symbol` or string
returns the corresponding column (without copying).
Indexing into `DataFrameColumns` objects using a multiple column selector
returns a subsetted `DataFrameColumns` object with parent containing only the
selected columns (without copying).

`DataFrameColumns` supports most of the `AbstractVector` API. The key
differences are that it is read-only and that the `keys` function returns a
vector of `Symbol`s (and not integers as for normal vectors).

In particular `findnext`, `findprev`, `findfirst`, `findlast`, and `findall`
functions are supported, and in `findnext`, `findprev` functions it is allowed
to pass integer, string, or `Symbol` as a reference index.
"""

"""
DataFrameColumns{<:AbstractDataFrame}

A vector-like object that allows iteration over columns of an `AbstractDataFrame`.

Indexing into `DataFrameColumns` objects using integer, `Symbol` or string
returns the corresponding column (without copying).
Indexing into `DataFrameColumns` objects using a multiple column selector
returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the
original containing only the selected columns.

`DataFrameColumns` supports most of the `AbstractVector` API. The key differences are that it is
read-only and that the `keys` function returns a vector of `Symbol`s (and not
integers as for normal vectors).
$DATAFRAMECOLUMNS_DOCSTR
"""
struct DataFrameColumns{T<:AbstractDataFrame}
df::T
Expand All @@ -131,18 +139,10 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs))
"""
eachcol(df::AbstractDataFrame)

Return a `DataFrameColumns` object that is a vector-like
that allows iterating an `AbstractDataFrame` column by column.

Indexing into `DataFrameColumns` objects using integer, `Symbol` or string
returns the corresponding column (without copying).
Indexing into `DataFrameColumns` objects using a multiple column selector
returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the
original containg only the selected columns.
Return a `DataFrameColumns` object that is a vector-like that allows iterating
an `AbstractDataFrame` column by column.

It supports most of the `AbstractVector` API. The key differences are that it is
read-only and is that the `keys` function returns a vector of `Symbol`s (and not
integers as for normal vectors).
$DATAFRAMECOLUMNS_DOCSTR

# Examples
```jldoctest
Expand Down Expand Up @@ -193,7 +193,7 @@ Base.iterate(itr::DataFrameColumns, i::Integer=1) =
Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) =
parent(itr)[!, idx]
Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) =
eachcol(view(parent(itr), !, idx))
eachcol(parent(itr)[!, idx])
Base.:(==)(itr1::DataFrameColumns, itr2::DataFrameColumns) =
parent(itr1) == parent(itr2)
Base.isequal(itr1::DataFrameColumns, itr2::DataFrameColumns) =
Expand Down Expand Up @@ -237,8 +237,12 @@ where `name` is the column name of the column `col`.
Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr))
Base.findnext(f::Function, itr::DataFrameColumns, i::Integer) =
findnext(f, values(itr), i)
Base.findnext(f::Function, itr::DataFrameColumns, i::Union{Symbol, AbstractString}) =
findnext(f, values(itr), index(parent(itr))[i])
Base.findprev(f::Function, itr::DataFrameColumns, i::Integer) =
findprev(f, values(itr), i)
Base.findprev(f::Function, itr::DataFrameColumns, i::Union{Symbol, AbstractString}) =
findprev(f, values(itr), index(parent(itr))[i])
Base.findfirst(f::Function, itr::DataFrameColumns) =
findfirst(f, values(itr))
Base.findlast(f::Function, itr::DataFrameColumns) =
Expand Down
22 changes: 18 additions & 4 deletions test/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@ using Test, DataFrames
@test size(eachcol(df), 2) == 1
@test_throws ArgumentError size(eachcol(df), 0)
@test eachcol(df)[1] == df[:, 1]
@test eachcol(df)[:A] == df[:, :A]
@test eachcol(df)[:A] === df[!, :A]
@test eachcol(df)[All()] == eachcol(df)
@test isequal(eachcol(df)[[1]], eachcol(df[!, [1]]))
@test eachcol(df).A == df[:, :A]
@test eachcol(df)["A"] == df[:, "A"]
@test eachcol(df)."A" == df[:, "A"]
@test eachcol(df).A === df[!, :A]
@test eachcol(df)["A"] === df[!, "A"]
@test eachcol(df)."A" === df[!, "A"]
@test collect(eachcol(df)) isa Vector{AbstractVector}
@test collect(eachcol(df)) == [[1, 2], [2, 3]]
@test eltype(eachcol(df)) == AbstractVector
@test_throws ArgumentError eachcol(df)[[1,1]]
@test eachcol(df)[[1]][1] === df.A
for col in eachcol(df)
@test isa(col, AbstractVector)
end
Expand Down Expand Up @@ -167,8 +169,20 @@ end
cols = eachcol(df)
@test findfirst(col -> eltype(col) <: Int, cols) == 1
@test findnext(col -> eltype(col) <: Int, cols, 2) == 3
@test findnext(col -> eltype(col) <: Int, cols, 10) === nothing
@test_throws BoundsError findnext(col -> eltype(col) <: Int, cols, -1)
@test_throws ArgumentError findnext(col -> eltype(col) <: Int, cols, :x1)
@test_throws ArgumentError findnext(col -> eltype(col) <: Int, cols, "x1")
@test findnext(col -> eltype(col) <: Int, cols, :b) == 3
@test findnext(col -> eltype(col) <: Int, cols, "b") == 3
@test findlast(col -> eltype(col) <: Int, cols) == 3
@test findprev(col -> eltype(col) <: Int, cols, 2) == 1
@test findprev(col -> eltype(col) <: Int, cols, :b) == 1
@test findprev(col -> eltype(col) <: Int, cols, "b") == 1
@test findprev(col -> eltype(col) <: Int, cols, -1) === nothing
@test_throws BoundsError findprev(col -> eltype(col) <: Int, cols, 10)
@test_throws ArgumentError findprev(col -> eltype(col) <: Int, cols, :x1)
@test_throws ArgumentError findprev(col -> eltype(col) <: Int, cols, "x1")
@test findall(col -> eltype(col) <: Int, cols) == [1, 3]
end

Expand Down