Skip to content

Commit

Permalink
Return NaN for negative ModeResult variance estimates (#2471)
Browse files Browse the repository at this point in the history
* Return NaN for negative ModeResult variance estimates

* Apply suggestions from mhauru

Co-authored-by: Markus Hauru <markus@mhauru.org>

* Add doc to StatsBase.coefTable(::ModeResult, ...) for numerrors_warnonly

* Add test for same coeftable with/without numerrors_warnonly

* Add a test for coeftable with negative variance

---------

Co-authored-by: Markus Hauru <markus@mhauru.org>
Co-authored-by: Xianda Sun <5433119+sunxd3@users.noreply.github.com>
Co-authored-by: Markus Hauru <mhauru@turing.ac.uk>
  • Loading branch information
4 people authored Feb 18, 2025
1 parent ddd74b1 commit 7b43f58
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 3 deletions.
61 changes: 58 additions & 3 deletions src/optimisation/Optimisation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ using Printf: Printf
using ForwardDiff: ForwardDiff
using StatsAPI: StatsAPI
using Statistics: Statistics
using LinearAlgebra: LinearAlgebra

export maximum_a_posteriori, maximum_likelihood
# The MAP and MLE exports are only needed for the Optim.jl interface.
Expand Down Expand Up @@ -228,11 +229,61 @@ end

# Various StatsBase methods for ModeResult

function StatsBase.coeftable(m::ModeResult; level::Real=0.95)
"""
StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true)
Return a table with coefficients and related statistics of the model. level determines the
level for confidence intervals (by default, 95%).
In case the `numerrors_warnonly` argument is true (the default) numerical errors encountered
during the computation of the standard errors will be caught and reported in an extra
"Error notes" column.
"""
function StatsBase.coeftable(m::ModeResult; level::Real=0.95, numerrors_warnonly::Bool=true)
# Get columns for coeftable.
terms = string.(StatsBase.coefnames(m))
estimates = m.values.array[:, 1]
stderrors = StatsBase.stderror(m)
# If numerrors_warnonly is true, and if either the information matrix is singular or has
# negative entries on its diagonal, then `notes` will be a list of strings for each
# value in `m.values`, explaining why the standard error is NaN.
notes = nothing
local stderrors
if numerrors_warnonly
infmat = StatsBase.informationmatrix(m)
local vcov
try
vcov = inv(infmat)
catch e
if isa(e, LinearAlgebra.SingularException)
stderrors = fill(NaN, length(m.values))
notes = fill("Information matrix is singular", length(m.values))
else
rethrow(e)
end
else
vars = LinearAlgebra.diag(vcov)
stderrors = eltype(vars)[]
if any(x -> x < 0, vars)
notes = []
end
for var in vars
if var >= 0
push!(stderrors, sqrt(var))
if notes !== nothing
push!(notes, "")
end
else
push!(stderrors, NaN)
if notes !== nothing
push!(notes, "Negative variance")
end
end
end
end
else
stderrors = StatsBase.stderror(m)
end
zscore = estimates ./ stderrors
p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore)

Expand All @@ -244,7 +295,7 @@ function StatsBase.coeftable(m::ModeResult; level::Real=0.95)
level_ = 100 * level
level_percentage = isinteger(level_) ? Int(level_) : level_

cols = [estimates, stderrors, zscore, p, ci_low, ci_high]
cols = Vector[estimates, stderrors, zscore, p, ci_low, ci_high]
colnms = [
"Coef.",
"Std. Error",
Expand All @@ -253,6 +304,10 @@ function StatsBase.coeftable(m::ModeResult; level::Real=0.95)
"Lower $(level_percentage)%",
"Upper $(level_percentage)%",
]
if notes !== nothing
push!(cols, notes)
push!(colnms, "Error notes")
end
return StatsBase.CoefTable(cols, colnms, terms)
end

Expand Down
59 changes: 59 additions & 0 deletions test/optimisation/Optimisation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,65 @@ using Turing
maximum_a_posteriori(m; adtype=adbackend)
end
end

@testset "Collinear coeftable" begin
xs = [-1.0, 0.0, 1.0]
ys = [0.0, 0.0, 0.0]

@model function collinear(x, y)
a ~ Normal(0, 1)
b ~ Normal(0, 1)
return y ~ MvNormal(a .* x .+ b .* x, 1)
end

model = collinear(xs, ys)
mle_estimate = Turing.Optimisation.estimate_mode(model, MLE())
tab = coeftable(mle_estimate)
@assert isnan(tab.cols[2][1])
@assert tab.colnms[end] == "Error notes"
@assert occursin("singular", tab.cols[end][1])
end

@testset "Negative variance" begin
# A model for which the likelihood has a saddle point at x=0, y=0.
# Creating an optimisation result for this model at the x=0, y=0 results in negative
# variance for one of the variables, because the variance is calculated as the
# diagonal of the inverse of the Hessian.
@model function saddle_model()
x ~ Normal(0, 1)
y ~ Normal(x, 1)
Turing.@addlogprob! x^2 - y^2
return nothing
end
m = saddle_model()
ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext())
optim_ld = Turing.Optimisation.OptimLogDensity(m, ctx)
vals = Turing.Optimisation.NamedArrays.NamedArray([0.0, 0.0])
m = Turing.Optimisation.ModeResult(vals, nothing, 0.0, optim_ld)
ct = coeftable(m)
@assert isnan(ct.cols[2][1])
@assert ct.colnms[end] == "Error notes"
@assert occursin("Negative variance", ct.cols[end][1])
end

@testset "Same coeftable with/without numerrors_warnonly" begin
xs = [0.0, 1.0, 2.0]

@model function extranormal(x)
mean ~ Normal(0, 1)
return x ~ Normal(mean, 1)
end

model = extranormal(xs)
mle_estimate = Turing.Optimisation.estimate_mode(model, MLE())
warnonly_coeftable = coeftable(mle_estimate; numerrors_warnonly=true)
no_warnonly_coeftable = coeftable(mle_estimate; numerrors_warnonly=false)
@assert warnonly_coeftable.cols == no_warnonly_coeftable.cols
@assert warnonly_coeftable.colnms == no_warnonly_coeftable.colnms
@assert warnonly_coeftable.rownms == no_warnonly_coeftable.rownms
@assert warnonly_coeftable.pvalcol == no_warnonly_coeftable.pvalcol
@assert warnonly_coeftable.teststatcol == no_warnonly_coeftable.teststatcol
end
end

end

0 comments on commit 7b43f58

Please sign in to comment.