diff --git a/src/colspec.jl b/src/colspec.jl index de9504c6..30ef15c7 100644 --- a/src/colspec.jl +++ b/src/colspec.jl @@ -10,7 +10,7 @@ const ColSelector = Union{Symbol,Integer,AbstractString} `ColSpec` is a union of types used to filter columns. The `ColSpec` type together with the `ColSelector` union type and -the `_filter` internal function form the ColSpec interface. +the `choose` function form the ColSpec interface. To implement the ColSpec interface, the following steps must be performed: @@ -21,12 +21,12 @@ struct MyTransform{S<:ColSpec,#= other type params =#} # other fileds end ``` -2. use `_filter(colspec, names)` internal function in apply: +2. use `choose(colspec, names)` function in apply: ```julia function apply(transform::MyTransform, table) names = Tables.columnnames(table) # selected column names - snames = _filter(transform.colspec, names) + snames = choose(transform.colspec, names) # code... end ``` @@ -41,33 +41,33 @@ end """ const ColSpec = Union{Vector{T},NTuple{N,T},Regex,Colon} where {N,T<:ColSelector} -# filter table columns using colspec -function _filter(colspec::Vector{Symbol}, names) +# choose column names using colspec +function choose(colspec::Vector{Symbol}, names) # validate columns @assert !isempty(colspec) "Invalid column selection." @assert colspec ⊆ names "Invalid column selection." return colspec end -_filter(colspec::Vector{<:AbstractString}, names) = - _filter(Symbol.(colspec), names) +choose(colspec::Vector{<:AbstractString}, names) = + choose(Symbol.(colspec), names) -_filter(colspec::Vector{<:Integer}, names::Vector) = - _filter(names[colspec], names) +choose(colspec::Vector{<:Integer}, names::Vector) = + choose(names[colspec], names) -_filter(colspec::Vector{<:Integer}, names::Tuple) = - _filter(colspec, collect(names)) +choose(colspec::Vector{<:Integer}, names::Tuple) = + choose(colspec, collect(names)) -_filter(colspec::NTuple{N,<:ColSelector}, names) where {N} = - _filter(collect(colspec), names) +choose(colspec::NTuple{N,<:ColSelector}, names) where {N} = + choose(collect(colspec), names) -function _filter(colspec::Regex, names::Vector) - fnames = filter(col -> occursin(colspec, String(col)), names) - _filter(fnames, names) +function choose(colspec::Regex, names::Vector) + fnames = filter(n -> occursin(colspec, String(n)), names) + choose(fnames, names) end -_filter(colspec::Regex, names::Tuple) = - _filter(colspec, collect(names)) +choose(colspec::Regex, names::Tuple) = + choose(colspec, collect(names)) -_filter(::Colon, names::Vector) = names -_filter(::Colon, names::Tuple) = collect(names) +choose(::Colon, names::Vector) = names +choose(::Colon, names::Tuple) = collect(names) diff --git a/src/transforms/filter.jl b/src/transforms/filter.jl index daef0906..3c380eba 100644 --- a/src/transforms/filter.jl +++ b/src/transforms/filter.jl @@ -79,7 +79,7 @@ _nonmissing(x) = _nonmissing(eltype(x), x) function apply(transform::DropMissing, table) names = Tables.columnnames(table) types = Tables.schema(table).types - snames = _filter(transform.colspec, names) + snames = choose(transform.colspec, names) ftrans = _ftrans(transform, snames) newtable, fcache = apply(ftrans, table) diff --git a/src/transforms/select.jl b/src/transforms/select.jl index db389e51..5897a3b1 100644 --- a/src/transforms/select.jl +++ b/src/transforms/select.jl @@ -69,7 +69,7 @@ isrevertible(::Type{<:Select}) = true function apply(transform::Select, table) # retrieve relevant column names allcols = collect(Tables.columnnames(table)) - select = _filter(transform.colspec, allcols) + select = choose(transform.colspec, allcols) reject = setdiff(allcols, select) # keep track of indices to revert later @@ -141,7 +141,7 @@ isrevertible(::Type{<:Reject}) = true function apply(transform::Reject, table) allcols = Tables.columnnames(table) - reject = _filter(transform.colspec, allcols) + reject = choose(transform.colspec, allcols) select = setdiff(allcols, reject) strans = Select(select) newtable, scache = apply(strans, table) diff --git a/test/colspec.jl b/test/colspec.jl new file mode 100644 index 00000000..1039ae17 --- /dev/null +++ b/test/colspec.jl @@ -0,0 +1,85 @@ +@testset "ColSpec" begin + vecnames = [:a, :b, :c, :d, :e, :f] + tupnames = (:a, :b, :c, :d, :e, :f) + + # vector of symbols + colspec = [:a, :c, :e] + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # tuple of symbols + colspec = (:a, :c, :e) + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # vector of strings + colspec = ["a", "c", "e"] + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # tuple of strings + colspec = ("a", "c", "e") + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # vector of integers + colspec = [1, 3, 5] + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # tuple of integers + colspec = (1, 3, 5) + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # regex + colspec = r"[ace]" + snames = TableTransforms.choose(colspec, vecnames) + @test snames == [:a, :c, :e] + snames = TableTransforms.choose(colspec, tupnames) + @test snames == [:a, :c, :e] + + # colon + snames = TableTransforms.choose(:, vecnames) + @test snames == [:a, :b, :c, :d, :e, :f] + snames = TableTransforms.choose(:, tupnames) + @test snames == [:a, :b, :c, :d, :e, :f] + + # throws + @test_throws AssertionError TableTransforms.choose(r"x", vecnames) + @test_throws AssertionError TableTransforms.choose(r"x", tupnames) + @test_throws AssertionError TableTransforms.choose(String[], vecnames) + @test_throws AssertionError TableTransforms.choose(String[], tupnames) + @test_throws AssertionError TableTransforms.choose(Symbol[], vecnames) + @test_throws AssertionError TableTransforms.choose(Symbol[], tupnames) + + # type stability + @inferred TableTransforms.choose([:a, :b], vecnames) + @inferred TableTransforms.choose([:a, :b], tupnames) + @inferred TableTransforms.choose((:a, :b), vecnames) + @inferred TableTransforms.choose((:a, :b), tupnames) + @inferred TableTransforms.choose(["a", "b"], vecnames) + @inferred TableTransforms.choose(["a", "b"], tupnames) + @inferred TableTransforms.choose(("a", "b"), vecnames) + @inferred TableTransforms.choose(("a", "b"), tupnames) + @inferred TableTransforms.choose([1, 2], vecnames) + @inferred TableTransforms.choose([1, 2], tupnames) + @inferred TableTransforms.choose((1, 2), vecnames) + @inferred TableTransforms.choose((1, 2), tupnames) + @inferred TableTransforms.choose(r"[ab]", vecnames) + @inferred TableTransforms.choose(r"[ab]", tupnames) + @inferred TableTransforms.choose(:, vecnames) + @inferred TableTransforms.choose(:, tupnames) +end diff --git a/test/runtests.jl b/test/runtests.jl index 48291b14..b744fe67 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,18 +27,18 @@ end Polynomial(args::T...) where {T<:Real} = Polynomial(collect(args)) (p::Polynomial)(x) = sum(a * x^(i-1) for (i, a) in enumerate(p.coeffs)) -function isequalmissing(v1, v2) - length(v1) == length(v2) || return false - for (x, y) in zip(v1, v2) +function isequalmissing(a, b) + length(a) == length(b) || return false + for (x, y) in zip(a, b) x === y || return false end return true end - # list of tests testfiles = [ "distributions.jl", + "colspec.jl", "transforms.jl" ] diff --git a/test/transforms.jl b/test/transforms.jl index 3497fee0..cacacf19 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -1,92 +1,6 @@ @testset "Transforms" begin # using MersenneTwister for compatibility between Julia versions rng = MersenneTwister(42) - @testset "ColSpec" begin - veccols = [:a, :b, :c, :d, :e, :f] - tupcols = (:a, :b, :c, :d, :e, :f) - - # vector of symbols - colspec = [:a, :c, :e] - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # tuple of symbols - colspec = (:a, :c, :e) - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # vector of strings - colspec = ["a", "c", "e"] - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # tuple of strings - colspec = ("a", "c", "e") - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # vector of integers - colspec = [1, 3, 5] - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # tuple of integers - colspec = (1, 3, 5) - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # regex - colspec = r"[ace]" - cols = TableTransforms._filter(colspec, veccols) - @test cols == [:a, :c, :e] - cols = TableTransforms._filter(colspec, tupcols) - @test cols == [:a, :c, :e] - - # colon - cols = TableTransforms._filter(:, veccols) - @test cols == [:a, :b, :c, :d, :e, :f] - cols = TableTransforms._filter(:, tupcols) - @test cols == [:a, :b, :c, :d, :e, :f] - - # throws - @test_throws AssertionError TableTransforms._filter(r"x", veccols) - @test_throws AssertionError TableTransforms._filter(r"x", tupcols) - @test_throws AssertionError TableTransforms._filter(String[], veccols) - @test_throws AssertionError TableTransforms._filter(String[], tupcols) - @test_throws AssertionError TableTransforms._filter(Symbol[], veccols) - @test_throws AssertionError TableTransforms._filter(Symbol[], tupcols) - - # type stability - @inferred TableTransforms._filter([:a, :b], veccols) - @inferred TableTransforms._filter([:a, :b], tupcols) - @inferred TableTransforms._filter((:a, :b), veccols) - @inferred TableTransforms._filter((:a, :b), tupcols) - @inferred TableTransforms._filter(["a", "b"], veccols) - @inferred TableTransforms._filter(["a", "b"], tupcols) - @inferred TableTransforms._filter(("a", "b"), veccols) - @inferred TableTransforms._filter(("a", "b"), tupcols) - @inferred TableTransforms._filter([1, 2], veccols) - @inferred TableTransforms._filter([1, 2], tupcols) - @inferred TableTransforms._filter((1, 2), veccols) - @inferred TableTransforms._filter((1, 2), tupcols) - @inferred TableTransforms._filter(r"[ab]", veccols) - @inferred TableTransforms._filter(r"[ab]", tupcols) - @inferred TableTransforms._filter(:, veccols) - @inferred TableTransforms._filter(:, tupcols) - end - @testset "Select" begin a = rand(4000) b = rand(4000)