Skip to content

Commit

Permalink
Merge pull request #41 from eliascarv/choose
Browse files Browse the repository at this point in the history
Rename `_filter` to `choose` and create colspec.jl file in test folder
  • Loading branch information
juliohm committed Apr 12, 2022
2 parents 7db1e56 + 084a6ef commit d1fd5ec
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 113 deletions.
40 changes: 20 additions & 20 deletions src/colspec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const ColSelector = Union{Symbol,Integer,AbstractString}
`ColSpec` is a union of types used to filter columns.
The `ColSpec` type together with the `ColSelector` union type and
the `_filter` internal function form the ColSpec interface.
the `choose` function form the ColSpec interface.
To implement the ColSpec interface, the following steps must be performed:
Expand All @@ -21,12 +21,12 @@ struct MyTransform{S<:ColSpec,#= other type params =#}
# other fileds
end
```
2. use `_filter(colspec, names)` internal function in apply:
2. use `choose(colspec, names)` function in apply:
```julia
function apply(transform::MyTransform, table)
names = Tables.columnnames(table)
# selected column names
snames = _filter(transform.colspec, names)
snames = choose(transform.colspec, names)
# code...
end
```
Expand All @@ -41,33 +41,33 @@ end
"""
const ColSpec = Union{Vector{T},NTuple{N,T},Regex,Colon} where {N,T<:ColSelector}

# filter table columns using colspec
function _filter(colspec::Vector{Symbol}, names)
# choose column names using colspec
function choose(colspec::Vector{Symbol}, names)
# validate columns
@assert !isempty(colspec) "Invalid column selection."
@assert colspec names "Invalid column selection."
return colspec
end

_filter(colspec::Vector{<:AbstractString}, names) =
_filter(Symbol.(colspec), names)
choose(colspec::Vector{<:AbstractString}, names) =
choose(Symbol.(colspec), names)

_filter(colspec::Vector{<:Integer}, names::Vector) =
_filter(names[colspec], names)
choose(colspec::Vector{<:Integer}, names::Vector) =
choose(names[colspec], names)

_filter(colspec::Vector{<:Integer}, names::Tuple) =
_filter(colspec, collect(names))
choose(colspec::Vector{<:Integer}, names::Tuple) =
choose(colspec, collect(names))

_filter(colspec::NTuple{N,<:ColSelector}, names) where {N} =
_filter(collect(colspec), names)
choose(colspec::NTuple{N,<:ColSelector}, names) where {N} =
choose(collect(colspec), names)

function _filter(colspec::Regex, names::Vector)
fnames = filter(col -> occursin(colspec, String(col)), names)
_filter(fnames, names)
function choose(colspec::Regex, names::Vector)
fnames = filter(n -> occursin(colspec, String(n)), names)
choose(fnames, names)
end

_filter(colspec::Regex, names::Tuple) =
_filter(colspec, collect(names))
choose(colspec::Regex, names::Tuple) =
choose(colspec, collect(names))

_filter(::Colon, names::Vector) = names
_filter(::Colon, names::Tuple) = collect(names)
choose(::Colon, names::Vector) = names
choose(::Colon, names::Tuple) = collect(names)
2 changes: 1 addition & 1 deletion src/transforms/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ _nonmissing(x) = _nonmissing(eltype(x), x)
function apply(transform::DropMissing, table)
names = Tables.columnnames(table)
types = Tables.schema(table).types
snames = _filter(transform.colspec, names)
snames = choose(transform.colspec, names)
ftrans = _ftrans(transform, snames)
newtable, fcache = apply(ftrans, table)

Expand Down
4 changes: 2 additions & 2 deletions src/transforms/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ isrevertible(::Type{<:Select}) = true
function apply(transform::Select, table)
# retrieve relevant column names
allcols = collect(Tables.columnnames(table))
select = _filter(transform.colspec, allcols)
select = choose(transform.colspec, allcols)
reject = setdiff(allcols, select)

# keep track of indices to revert later
Expand Down Expand Up @@ -141,7 +141,7 @@ isrevertible(::Type{<:Reject}) = true

function apply(transform::Reject, table)
allcols = Tables.columnnames(table)
reject = _filter(transform.colspec, allcols)
reject = choose(transform.colspec, allcols)
select = setdiff(allcols, reject)
strans = Select(select)
newtable, scache = apply(strans, table)
Expand Down
85 changes: 85 additions & 0 deletions test/colspec.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
@testset "ColSpec" begin
vecnames = [:a, :b, :c, :d, :e, :f]
tupnames = (:a, :b, :c, :d, :e, :f)

# vector of symbols
colspec = [:a, :c, :e]
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of symbols
colspec = (:a, :c, :e)
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# vector of strings
colspec = ["a", "c", "e"]
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of strings
colspec = ("a", "c", "e")
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# vector of integers
colspec = [1, 3, 5]
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# tuple of integers
colspec = (1, 3, 5)
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# regex
colspec = r"[ace]"
snames = TableTransforms.choose(colspec, vecnames)
@test snames == [:a, :c, :e]
snames = TableTransforms.choose(colspec, tupnames)
@test snames == [:a, :c, :e]

# colon
snames = TableTransforms.choose(:, vecnames)
@test snames == [:a, :b, :c, :d, :e, :f]
snames = TableTransforms.choose(:, tupnames)
@test snames == [:a, :b, :c, :d, :e, :f]

# throws
@test_throws AssertionError TableTransforms.choose(r"x", vecnames)
@test_throws AssertionError TableTransforms.choose(r"x", tupnames)
@test_throws AssertionError TableTransforms.choose(String[], vecnames)
@test_throws AssertionError TableTransforms.choose(String[], tupnames)
@test_throws AssertionError TableTransforms.choose(Symbol[], vecnames)
@test_throws AssertionError TableTransforms.choose(Symbol[], tupnames)

# type stability
@inferred TableTransforms.choose([:a, :b], vecnames)
@inferred TableTransforms.choose([:a, :b], tupnames)
@inferred TableTransforms.choose((:a, :b), vecnames)
@inferred TableTransforms.choose((:a, :b), tupnames)
@inferred TableTransforms.choose(["a", "b"], vecnames)
@inferred TableTransforms.choose(["a", "b"], tupnames)
@inferred TableTransforms.choose(("a", "b"), vecnames)
@inferred TableTransforms.choose(("a", "b"), tupnames)
@inferred TableTransforms.choose([1, 2], vecnames)
@inferred TableTransforms.choose([1, 2], tupnames)
@inferred TableTransforms.choose((1, 2), vecnames)
@inferred TableTransforms.choose((1, 2), tupnames)
@inferred TableTransforms.choose(r"[ab]", vecnames)
@inferred TableTransforms.choose(r"[ab]", tupnames)
@inferred TableTransforms.choose(:, vecnames)
@inferred TableTransforms.choose(:, tupnames)
end
8 changes: 4 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,18 @@ end
Polynomial(args::T...) where {T<:Real} = Polynomial(collect(args))
(p::Polynomial)(x) = sum(a * x^(i-1) for (i, a) in enumerate(p.coeffs))

function isequalmissing(v1, v2)
length(v1) == length(v2) || return false
for (x, y) in zip(v1, v2)
function isequalmissing(a, b)
length(a) == length(b) || return false
for (x, y) in zip(a, b)
x === y || return false
end
return true
end


# list of tests
testfiles = [
"distributions.jl",
"colspec.jl",
"transforms.jl"
]

Expand Down
86 changes: 0 additions & 86 deletions test/transforms.jl
Original file line number Diff line number Diff line change
@@ -1,92 +1,6 @@
@testset "Transforms" begin
# using MersenneTwister for compatibility between Julia versions
rng = MersenneTwister(42)
@testset "ColSpec" begin
veccols = [:a, :b, :c, :d, :e, :f]
tupcols = (:a, :b, :c, :d, :e, :f)

# vector of symbols
colspec = [:a, :c, :e]
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# tuple of symbols
colspec = (:a, :c, :e)
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# vector of strings
colspec = ["a", "c", "e"]
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# tuple of strings
colspec = ("a", "c", "e")
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# vector of integers
colspec = [1, 3, 5]
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# tuple of integers
colspec = (1, 3, 5)
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# regex
colspec = r"[ace]"
cols = TableTransforms._filter(colspec, veccols)
@test cols == [:a, :c, :e]
cols = TableTransforms._filter(colspec, tupcols)
@test cols == [:a, :c, :e]

# colon
cols = TableTransforms._filter(:, veccols)
@test cols == [:a, :b, :c, :d, :e, :f]
cols = TableTransforms._filter(:, tupcols)
@test cols == [:a, :b, :c, :d, :e, :f]

# throws
@test_throws AssertionError TableTransforms._filter(r"x", veccols)
@test_throws AssertionError TableTransforms._filter(r"x", tupcols)
@test_throws AssertionError TableTransforms._filter(String[], veccols)
@test_throws AssertionError TableTransforms._filter(String[], tupcols)
@test_throws AssertionError TableTransforms._filter(Symbol[], veccols)
@test_throws AssertionError TableTransforms._filter(Symbol[], tupcols)

# type stability
@inferred TableTransforms._filter([:a, :b], veccols)
@inferred TableTransforms._filter([:a, :b], tupcols)
@inferred TableTransforms._filter((:a, :b), veccols)
@inferred TableTransforms._filter((:a, :b), tupcols)
@inferred TableTransforms._filter(["a", "b"], veccols)
@inferred TableTransforms._filter(["a", "b"], tupcols)
@inferred TableTransforms._filter(("a", "b"), veccols)
@inferred TableTransforms._filter(("a", "b"), tupcols)
@inferred TableTransforms._filter([1, 2], veccols)
@inferred TableTransforms._filter([1, 2], tupcols)
@inferred TableTransforms._filter((1, 2), veccols)
@inferred TableTransforms._filter((1, 2), tupcols)
@inferred TableTransforms._filter(r"[ab]", veccols)
@inferred TableTransforms._filter(r"[ab]", tupcols)
@inferred TableTransforms._filter(:, veccols)
@inferred TableTransforms._filter(:, tupcols)
end

@testset "Select" begin
a = rand(4000)
b = rand(4000)
Expand Down

0 comments on commit d1fd5ec

Please sign in to comment.