Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Replace #42

Merged
merged 14 commits into from
Apr 13, 2022
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ Please check the docstrings for additional information.
| `DropMissing` | Drop missings |
| `Rename` | Column renaming |
| `Coalesce` | Replace missings |
| `Replace` | Replace values |
| `Identity` | Identity transform |
| `Center` | Mean removal |
| `Scale` | Interval scaling |
Expand Down
1 change: 1 addition & 0 deletions src/TableTransforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export
DropMissing,
Rename,
Coalesce,
Replace,
eliascarv marked this conversation as resolved.
Show resolved Hide resolved
Identity,
Center,
Scale,
Expand Down
1 change: 1 addition & 0 deletions src/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ include("transforms/select.jl")
include("transforms/filter.jl")
include("transforms/rename.jl")
include("transforms/coalesce.jl")
include("transforms/replace.jl")
eliascarv marked this conversation as resolved.
Show resolved Hide resolved
include("transforms/identity.jl")
include("transforms/center.jl")
include("transforms/scale.jl")
Expand Down
30 changes: 30 additions & 0 deletions src/transforms/replace.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

"""
Replace(old₁ => new₁, old₂ => new₂, ..., oldₙ => newₙ)

Replaces all occurrences of `oldᵢ` with `newᵢ` in the table.
eliascarv marked this conversation as resolved.
Show resolved Hide resolved
"""
struct Replace{K,V} <: Colwise
pairs::IdDict{K,V}
end

Replace() = throw(ArgumentError("Cannot create a Replace object without arguments."))

Replace(pairs::Pair...) = Replace(IdDict(values(pairs)))

isrevertible(::Type{<:Replace}) = true

function colcache(transform::Replace, x)
olds = keys(transform.pairs)
inds = [findall(v -> v === old, x) .=> old for old in olds]
Dict(reduce(vcat, inds))
end

colapply(transform::Replace, x, c) =
map(v -> get(transform.pairs, v, v), x)
juliohm marked this conversation as resolved.
Show resolved Hide resolved

colrevert(::Replace, x, c) =
map(i -> get(c, i, x[i]), 1:length(x))
juliohm marked this conversation as resolved.
Show resolved Hide resolved
83 changes: 83 additions & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,89 @@
@test ttypes == Tables.schema(tₒ).types
end

@testset "Replace" begin
a = [3, 2, 1, 4, 5, 3]
b = [2, 4, 4, 5, 8, 5]
c = [1, 1, 6, 2, 4, 1]
d = [4, 3, 7, 5, 4, 1]
e = [5, 5, 2, 6, 5, 2]
f = [4, 4, 3, 4, 5, 2]
t = Table(; a, b, c, d, e, f)

# replace with a value of the same type
T = Replace(1 => -1, 5 => -5)
n, c = apply(T, t)
@test n.a == [3, 2, -1, 4, -5, 3]
@test n.b == [2, 4, 4, -5, 8, -5]
@test n.c == [-1, -1, 6, 2, 4, -1]
@test n.d == [4, 3, 7, -5, 4, -1]
@test n.e == [-5, -5, 2, 6, -5, 2]
@test n.f == [4, 4, 3, 4, -5, 2]
@test isrevertible(T) == true
tₒ = revert(T, n, c)
@test t == tₒ

# table schema after apply and revert
T = Replace(1 => -1, 5 => -5)
n, c = apply(T, t)
types = Tables.schema(t).types
@test types == Tables.schema(n).types
tₒ = revert(T, n, c)
@test types == Tables.schema(tₒ).types

# replace with a value of another type
T = Replace(1 => 1.5, 5 => 5.5, 4 => true)
n, c = apply(T, t)
@test n.a == Real[3, 2, 1.5, true, 5.5, 3]
@test n.b == Real[2, true, true, 5.5, 8, 5.5]
@test n.c == Real[1.5, 1.5, 6, 2, true, 1.5]
@test n.d == Real[true, 3, 7, 5.5, true, 1.5]
@test n.e == Real[5.5, 5.5, 2, 6, 5.5, 2]
@test n.f == Real[true, true, 3, true, 5.5, 2]
tₒ = revert(T, n, c)
@test t == tₒ

# table schema after apply and revert
T = Replace(1 => 1.5, 5 => 5.5, 4 => true)
n, c = apply(T, t)
tₒ = revert(T, n, c)
ttypes = Tables.schema(t).types
ntypes = Tables.schema(n).types
@test ntypes[1] == typejoin(typeof.(n.a)...)
eliascarv marked this conversation as resolved.
Show resolved Hide resolved
@test ntypes[2] == typejoin(typeof.(n.b)...)
@test ntypes[3] == typejoin(typeof.(n.c)...)
@test ntypes[4] == typejoin(typeof.(n.d)...)
@test ntypes[5] == typejoin(typeof.(n.e)...)
@test ntypes[6] == typejoin(typeof.(n.f)...)
@test ttypes == Tables.schema(tₒ).types

# no occurrences
T = Replace(10 => 11, 20 => 30)
n, c = apply(T, t)
@test t == n
tₒ = revert(T, n, c)
@test t == tₒ

# collumns with diferent types
a = [3, 2, 1, 4, 5, 3]
b = [2.5, 4.5, 4.7, 2.5, 2.5, 5.3]
c = [true, false, false, false, true, false]
d = ['a', 'b', 'c', 'd', 'e', 'a']
t = Table(; a, b, c, d)

T = Replace(3 => -3, 2.5 => 2.0, true => false, 'a' => 'A')
n, c = apply(T, t)
@test n.a == [-3, 2, 1, 4, 5, -3]
@test n.b == [2.0, 4.5, 4.7, 2.0, 2.0, 5.3]
@test n.c == [false, false, false, false, false, false]
@test n.d == ['A', 'b', 'c', 'd', 'e', 'A']
tₒ = revert(T, n, c)
@test t == tₒ

# throws
@test_throws ArgumentError Replace()
end

@testset "Identity" begin
x = rand(4000)
y = rand(4000)
Expand Down