diff --git a/CHANGELOG.md b/CHANGELOG.md index f7abfc5..509c52d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Release v1.8 + +- New function `cmi`, which computes conditional mutual information. + ## Release v1.7 - Bug fix in `mutualinfo` for naive estimators from Entropies.jl. diff --git a/Project.toml b/Project.toml index fff305b..ce6efff 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TransferEntropy" uuid = "ea221983-52f3-5440-99c7-13ea201cd633" repo = "https://github.com/kahaaga/TransferEntropy.jl.git" -version = "1.7.0" +version = "1.8.0" [deps] DSP = "717857b8-e6f2-59f4-9121-6e50c889abd2" diff --git a/docs/src/mutualinfo.md b/docs/src/mutualinfo.md index 281b664..61f5282 100644 --- a/docs/src/mutualinfo.md +++ b/docs/src/mutualinfo.md @@ -1,4 +1,5 @@ ```@docs mutualinfo +conditional_mutualinfo ``` diff --git a/src/mutualinfo/interface.jl b/src/mutualinfo/interface.jl index dcdece1..160c2a6 100644 --- a/src/mutualinfo/interface.jl +++ b/src/mutualinfo/interface.jl @@ -1,4 +1,4 @@ -export mutualinfo, Kraskov1, Kraskov2 +export mutualinfo, conditional_mutualinfo, Kraskov1, Kraskov2 abstract type MutualInformationEstimator <: EntropyEstimator end @@ -6,13 +6,14 @@ abstract type MutualInformationEstimator <: EntropyEstimator end mutualinfo(x, y, est; base = 2, q = 1) Estimate mutual information between `x` and `y`, ``I^{q}(x; y)``, using the provided -entropy/probability estimator `est` from Entropies.jl, and Rényi entropy of order `q` +entropy/probability estimator `est` from Entropies.jl or specialized estimator from +TransferEntropy.jl (e.g. [`Kraskov1`](@ref)), and Rényi entropy of order `q` (defaults to `q = 1`, which is the Shannon entropy), with logarithms to the given `base`. Both `x` and `y` can be vectors or (potentially multivariate) [`Dataset`](@ref)s. Worth highlighting here are the estimators that compute entropies _directly_, e.g. -nearest-neighbor based methhods. The choice is between naive +nearest-neighbor based methods. The choice is between naive estimation using the [`KozachenkoLeonenko`](@ref) or [`Kraskov`](@ref) entropy estimators, or the improved [`Kraskov1`](@ref) and [`Kraskov2`](@ref) dedicated ``I`` estimators. The latter estimators reduce bias compared to the naive estimators. @@ -49,6 +50,25 @@ function mutualinfo(x::Vector_or_Dataset, y::Vector_or_Dataset, est; base = 2, q Y = genentropy(Dataset(y), est; base = base, q = q) XY = genentropy(Dataset(x, y), est; base = base, q = q) MI = X + Y - XY -end +end + +""" + conditional_mutualinfo(x, y, z, est; base = 2, q = 1) + +Estimate, ``I^{q}(x; y | z)``, the conditional mutual information between `x`, `y` given +`z`, using the provided entropy/probability estimator `est` from Entropies.jl or specialized +estimator from TransferEntropy.jl (e.g. [`Kraskov1`](@ref)), and Rényi entropy of order `q` +(defaults to `q = 1`, which is the Shannon entropy), with logarithms to the given +`base`. + +As for [`mutualinfo`](@ref), the variables `x`, `y` and `z` can be vectors or potentially +multivariate) [`Dataset`](@ref)s, and the keyword `q` cannot be provided for +nearest-neighbor estimators (it is hard-coded to `q = 1`). +""" +function conditional_mutualinfo(x::Vector_or_Dataset, y::Vector_or_Dataset, z::Vector_or_Dataset, est; + base = 2, q = 1) + mutualinfo(x, Dataset(y, z), est; base = base, q = q) - + mutualinfo(x, z, est; base = base, q = q) +end include("nearestneighbor.jl") \ No newline at end of file diff --git a/src/mutualinfo/nearestneighbor.jl b/src/mutualinfo/nearestneighbor.jl index efbe673..299c261 100644 --- a/src/mutualinfo/nearestneighbor.jl +++ b/src/mutualinfo/nearestneighbor.jl @@ -169,4 +169,11 @@ function mutualinfo(x::Vector_or_Dataset{D1, T}, y::Vector_or_Dataset{D2, T}, es else return MI end +end + +# knn estimators don't have the `q` keyword, so need specialized method +function conditional_mutualinfo(x::Vector_or_Dataset, y::Vector_or_Dataset, z::Vector_or_Dataset, + est::KNNMutualInformationEstimator; base = MathConstants.e) + mutualinfo(x, Dataset(y, z), est; base = base) - + mutualinfo(x, z, est; base = base) end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index bf395b5..02b619c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -52,6 +52,30 @@ BruteForce = Entropies.BruteForce @test mutualinfo(z, w, est_k2) isa Real end +@testset "Conditional mutual information" begin + s, t, c = rand(100), rand(100), rand(100) + est_knn = Kraskov1(2) + est_bin = RectangularBinning(3) + # binning estimator yields non-negative values + @test conditional_mutualinfo(s, t, c, est_bin, q = 2) isa Real + @test conditional_mutualinfo(s, t, c, est_bin, q = 2) >= 0.0 + # verify formula I(X, Y | Z) = I(X; Y, Z) - I(X, Z) + @test conditional_mutualinfo(s, t, c, est_bin, base = 2) ≈ + mutualinfo(s, Dataset(t, c), est_bin, base = 2) - mutualinfo(s, c, est_bin, base = 2) + + @test conditional_mutualinfo(s, t, c, est_knn) isa Real + @test conditional_mutualinfo(s, t, c, est_knn, base = 2) ≈ + mutualinfo(s, Dataset(t, c), est_knn, base = 2) - mutualinfo(s, c, est_knn, base = 2) + + # Different types of input + @test conditional_mutualinfo(s, Dataset(t, c), c, est_bin) isa Real + @test conditional_mutualinfo(Dataset(s, t), Dataset(t, c), c, est_bin) isa Real + @test conditional_mutualinfo(Dataset(s, t), Dataset(t, c), Dataset(c, s), est_bin) isa Real + @test conditional_mutualinfo(s, Dataset(t, c), Dataset(c, s), est_bin) isa Real + @test conditional_mutualinfo(s, t, Dataset(c, s), est_bin) isa Real + @test conditional_mutualinfo(Dataset(s, t), t, c, est_bin) isa Real +end + @testset "Transfer entropy" begin s, t, c = rand(100), rand(100), rand(100)