From 516e6534272107dd9eb07f2b905540c999aae021 Mon Sep 17 00:00:00 2001 From: Osvaldo A Martin Date: Wed, 15 May 2024 15:55:29 -0300 Subject: [PATCH] Fix mle bugs in more distributions (#435) * fix mle estimations * fix test * fix test --- preliz/distributions/asymmetric_laplace.py | 16 ++---- preliz/distributions/exgaussian.py | 19 ++----- preliz/distributions/skewnormal.py | 7 +-- preliz/distributions/triangular.py | 6 ++- preliz/distributions/truncatednormal.py | 63 +++++++++++----------- preliz/internal/special.py | 2 +- preliz/tests/test_mle.py | 23 +++++--- preliz/tests/test_scipy.py | 6 +-- 8 files changed, 67 insertions(+), 75 deletions(-) diff --git a/preliz/distributions/asymmetric_laplace.py b/preliz/distributions/asymmetric_laplace.py index 87078f25..802f805f 100644 --- a/preliz/distributions/asymmetric_laplace.py +++ b/preliz/distributions/asymmetric_laplace.py @@ -5,6 +5,7 @@ from .distributions import Continuous from ..internal.distribution_helper import all_not_none, eps +from ..internal.optimization import optimize_ml class AsymmetricLaplace(Continuous): @@ -178,9 +179,8 @@ def _fit_moments(self, mean, sigma): b = (sigma / 2) * (2**0.5) self._update(1, mu, b) - def _fit_mle(self, sample, **kwargs): - kappa, mu, b = nb_fit_mle(sample) - self._update(kappa, mu, b) + def _fit_mle(self, sample): + optimize_ml(self, sample) @nb.vectorize(nopython=True, cache=True) @@ -230,13 +230,3 @@ def nb_rvs(random_samples, mu, b, kappa): @nb.njit(cache=True) def nb_entropy(b, kappa): return 1 + np.log(kappa + 1 / kappa) + np.log(b) - - -@nb.njit(cache=True) -def nb_fit_mle(sample): - new_mu = np.median(sample) - new_b = np.mean(np.abs(sample - new_mu)) - new_kappa = np.sum((sample - new_mu) * np.sign(sample - new_mu)) / np.sum( - np.abs(sample - new_mu) - ) - return new_kappa, new_mu, new_b diff --git a/preliz/distributions/exgaussian.py b/preliz/distributions/exgaussian.py index 0edaf747..ebdd41ae 100644 --- a/preliz/distributions/exgaussian.py +++ b/preliz/distributions/exgaussian.py @@ -6,7 +6,7 @@ from .distributions import Continuous from ..internal.distribution_helper import eps, all_not_none -from ..internal.special import erf, erfc, erfcx, mean_and_std +from ..internal.special import erf, mean_and_std, norm_logcdf from ..internal.optimization import find_ppf @@ -143,7 +143,7 @@ def kurtosis(self): def rvs(self, size=None, random_state=None): random_state = np.random.default_rng(random_state) return random_state.normal(self.mu, self.sigma, size) + random_state.exponential( - 1 / self.nu, size + self.nu, size ) def _fit_moments(self, mean, sigma): @@ -152,11 +152,11 @@ def _fit_moments(self, mean, sigma): def _fit_mle(self, sample): mean, std = mean_and_std(sample) - skweness = skew(sample) + skweness = max(1e-4, skew(sample)) nu = std * (skweness / 2) ** (1 / 3) mu = mean - nu var = std**2 * (1 - (skweness / 2) ** (2 / 3)) - self._update(mu, var**0.5, 1 / nu) + self._update(mu, var**0.5, nu) @nb.vectorize(nopython=True, cache=True) @@ -179,7 +179,7 @@ def nb_logpdf(x, mu, sigma, nu): -np.log(nu) + (mu - x) / nu + 0.5 * (sigma / nu) ** 2 - + normal_lcdf(x, mu + (sigma**2) / nu, sigma) + + norm_logcdf((x - (mu + (sigma**2) / nu)) / sigma) ) else: return -np.log(sigma) - 0.5 * np.log(2 * np.pi) - 0.5 * ((x - mu) / sigma) ** 2 @@ -188,12 +188,3 @@ def nb_logpdf(x, mu, sigma, nu): @nb.njit(cache=True) def nb_neg_logpdf(x, mu, sigma, nu): return -(nb_logpdf(x, mu, sigma, nu)).sum() - - -@nb.vectorize(nopython=True, cache=True) -def normal_lcdf(x, mu, sigma): - z_val = (x - mu) / sigma - if z_val < -1: - return np.log(erfcx(-z_val / 2**0.5) / 2) - abs(z_val) ** 2 / 2 - else: - return np.log1p(-erfc(z_val / 2**0.5) / 2) diff --git a/preliz/distributions/skewnormal.py b/preliz/distributions/skewnormal.py index 896c9a42..228631a1 100644 --- a/preliz/distributions/skewnormal.py +++ b/preliz/distributions/skewnormal.py @@ -8,7 +8,7 @@ from .distributions import Continuous from ..internal.distribution_helper import eps, to_precision, from_precision, all_not_none from ..internal.special import erf, norm_logcdf -from ..internal.optimization import find_ppf, optimize_ml +from ..internal.optimization import find_ppf, optimize_ml, optimize_moments class SkewNormal(Continuous): @@ -176,8 +176,9 @@ def rvs(self, size=None, random_state=None): return np.sign(u_0) * u_1 * self.sigma + self.mu def _fit_moments(self, mean, sigma): - # Assume gaussian - self._update(mean, sigma, 0) + if self.alpha is None: + self.alpha = 0 + optimize_moments(self, mean, sigma) def _fit_mle(self, sample): skewness = skew(sample) diff --git a/preliz/distributions/triangular.py b/preliz/distributions/triangular.py index 5bf91848..03cf1315 100644 --- a/preliz/distributions/triangular.py +++ b/preliz/distributions/triangular.py @@ -4,7 +4,6 @@ import numpy as np import numba as nb -from ..internal.optimization import optimize_ml from ..internal.distribution_helper import all_not_none from .distributions import Continuous @@ -172,7 +171,10 @@ def _fit_moments(self, mean, sigma): self._update(lower, c, upper) def _fit_mle(self, sample): - optimize_ml(self, sample) + lower = np.min(sample) + upper = np.max(sample) + middle = (np.mean(sample) * 3) - lower - upper + self._update(lower, middle, upper) @nb.vectorize(nopython=True, cache=True) diff --git a/preliz/distributions/truncatednormal.py b/preliz/distributions/truncatednormal.py index 357cb63a..5bcc3bea 100644 --- a/preliz/distributions/truncatednormal.py +++ b/preliz/distributions/truncatednormal.py @@ -3,7 +3,7 @@ import numpy as np import numba as nb -from ..internal.special import cdf_bounds, erf, erfinv, mean_and_std, ppf_bounds_cont +from ..internal.special import cdf_bounds, erf, erfinv, ppf_bounds_cont from ..internal.optimization import optimize_ml from ..internal.distribution_helper import eps, all_not_none from .distributions import Continuous @@ -128,14 +128,14 @@ def entropy(self): def mean(self): alpha = (self.lower - self.mu) / self.sigma beta = (self.upper - self.mu) / self.sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) return ( self.mu + ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * alpha**2)) - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * beta**2)) ) - / z + / z_val * self.sigma ) @@ -150,7 +150,7 @@ def median(self): def var(self): alpha = (self.lower - self.mu) / self.sigma beta = (self.upper - self.mu) / self.sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) # Handle for -np.inf or np.inf psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha) psi_beta = (0, 0) if beta == np.inf else (1, beta) @@ -160,13 +160,13 @@ def var(self): psi_beta[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) - psi_alpha[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) ) - / z + / z_val - ( ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 2 ) @@ -177,7 +177,7 @@ def std(self): def skewness(self): alpha = (self.lower - self.mu) / self.sigma beta = (self.upper - self.mu) / self.sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) # Handle for -np.inf or np.inf psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha) psi_beta = (0, 0) if beta == np.inf else (1, beta) @@ -190,7 +190,7 @@ def skewness(self): * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val - 3 * ( psi_alpha[1] @@ -204,14 +204,14 @@ def skewness(self): (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z**2 + / z_val**2 + 2 * ( ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 3 ) @@ -225,13 +225,13 @@ def skewness(self): * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val - ( ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 2 ) ** (3 / 2) @@ -240,7 +240,7 @@ def skewness(self): def kurtosis(self): alpha = (self.lower - self.mu) / self.sigma beta = (self.upper - self.mu) / self.sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) # Handle for -np.inf or np.inf psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha) psi_beta = (0, 0) if beta == np.inf else (1, beta) @@ -261,7 +261,7 @@ def kurtosis(self): - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) ** 2 - / z**3 + / z_val**3 ) - ( 4 @@ -277,7 +277,7 @@ def kurtosis(self): (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z**2 + / z_val**2 ) - ( 3 @@ -290,7 +290,7 @@ def kurtosis(self): * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 2 ) @@ -301,7 +301,7 @@ def kurtosis(self): (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 4 ) @@ -313,7 +313,7 @@ def kurtosis(self): * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) denominator = ( @@ -326,13 +326,13 @@ def kurtosis(self): * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val - ( ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0] ) - / z + / z_val ) ** 2 ) ** 2 @@ -349,8 +349,7 @@ def _fit_moments(self, mean, sigma): self._update(mean, sigma) def _fit_mle(self, sample): - mean, sigma = mean_and_std(sample) - self._update(mean, sigma, np.min(sample), np.max(sample)) + self._update(None, None, np.min(sample), np.max(sample)) optimize_ml(self, sample) @@ -359,8 +358,8 @@ def nb_cdf(x, mu, sigma, lower, upper): xi = (x - mu) / sigma alpha = (lower - mu) / sigma beta = (upper - mu) / sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) - prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z_val return cdf_bounds(prob, x, lower, upper) @@ -383,14 +382,14 @@ def nb_ppf(q, mu, sigma, lower, upper): def nb_entropy(mu, sigma, lower, upper): alpha = (lower - mu) / sigma beta = (upper - mu) / sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) # Handle for -np.inf or np.inf psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha) psi_beta = (0, 0) if beta == np.inf else (1, beta) - return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z) + ( + return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z_val) + ( (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[1] * psi_alpha[0] - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[1] * psi_beta[0] - ) / (2 * z) + ) / (2 * z_val) @nb.vectorize(nopython=True, cache=True) @@ -401,9 +400,9 @@ def nb_logpdf(x, mu, sigma, lower, upper): xi = (x - mu) / sigma alpha = (lower - mu) / sigma beta = (upper - mu) / sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) logphi = np.log(1 / (2 * np.pi) ** 0.5) - xi**2 / 2 - return logphi - (np.log(sigma) + np.log(z)) + return logphi - (np.log(sigma) + np.log(z_val)) @nb.njit(cache=True) @@ -415,6 +414,8 @@ def nb_neg_logpdf(x, mu, sigma, lower, upper): def nb_rvs(random_samples, mu, sigma, lower, upper): alpha = (lower - mu) / sigma beta = (upper - mu) / sigma - z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) - inv_phi = 2**0.5 * erfinv(2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z) - 1) + z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5)) + inv_phi = 2**0.5 * erfinv( + 2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z_val) - 1 + ) return inv_phi * sigma + mu diff --git a/preliz/internal/special.py b/preliz/internal/special.py index df2469c7..70eb2c2e 100644 --- a/preliz/internal/special.py +++ b/preliz/internal/special.py @@ -492,7 +492,7 @@ def xprody(x, y): @nb.vectorize(nopython=True, cache=True) def norm_logcdf(x): - t = x * np.sqrt(0.5) + t = x * 0.7071067811865476 if x < -1.0: return np.log(erfcx(-t) / 2) - t * t else: diff --git a/preliz/tests/test_mle.py b/preliz/tests/test_mle.py index b562c2c0..ddac733b 100644 --- a/preliz/tests/test_mle.py +++ b/preliz/tests/test_mle.py @@ -58,7 +58,7 @@ (Beta, (2, 5)), (BetaScaled, (2, 5, -1, 4)), (Cauchy, (0, 1)), - (ChiSquared, (1,)), + (ChiSquared, (5,)), (ExGaussian, (0, 1, 3)), (Exponential, (5,)), (Gamma, (2, 5)), @@ -77,25 +77,25 @@ (Normal, (0, 1)), (Pareto, (5, 1)), (Rice, (0, 2)), - (SkewNormal, (0, 1, -1)), + (SkewNormal, (0, 1, -6)), (SkewStudentT, (0, 1, 2, 2)), (StudentT, (4, 0, 1)), - (Triangular, (0, 2, 4)), - (TruncatedNormal, (0, 1, -1, 1)), + (Triangular, (0, 3, 4)), + (TruncatedNormal, (0, 0.5, -1, 1)), (Uniform, (2, 5)), (VonMises, (1, 2)), (Wald, (2, 1)), (Weibull, (2, 1)), (Bernoulli, (0.5,)), - (BetaBinomial, (1, 2, 10)), + (BetaBinomial, (2, 5, 10)), (Binomial, (5, 0.5)), (DiscreteUniform, (-2, 2)), (DiscreteWeibull, (0.9, 1.3)), (Geometric, (0.75,)), (HyperGeometric, (50, 10, 20)), - (NegativeBinomial, (10, 0.5)), + (NegativeBinomial, (10, 2.5)), (Poisson, (4.2,)), - (ZeroInflatedBinomial, (0.5, 10, 0.8)), + (ZeroInflatedBinomial, (0.5, 10, 0.6)), (ZeroInflatedNegativeBinomial, (0.7, 8, 4)), ( ZeroInflatedPoisson, @@ -111,8 +111,15 @@ def test_auto_recover(distribution, params): sample = distribution(*params).rvs(10_000) dist = distribution() try: + if dist.__class__.__name__ in [ + "BetaScaled", + "TruncatedNormal", + ]: + tol = 1 + else: + tol = 0.1 pz.mle([dist], sample) - assert_allclose(dist.params, params, atol=1) + assert_allclose(dist.params, params, atol=tol) break except AssertionError: pass diff --git a/preliz/tests/test_scipy.py b/preliz/tests/test_scipy.py index 214c2511..b5523876 100644 --- a/preliz/tests/test_scipy.py +++ b/preliz/tests/test_scipy.py @@ -71,8 +71,8 @@ ( ExGaussian, stats.exponnorm, - {"mu": -1, "sigma": 0.5, "nu": 1}, - {"loc": -1, "scale": 0.5, "K": 1 / 0.5}, + {"mu": -1, "sigma": 2, "nu": 5}, + {"loc": -1, "scale": 2, "K": 5 / 2}, ), (Exponential, stats.expon, {"beta": 3.7}, {"scale": 3.7}), (Gamma, stats.gamma, {"alpha": 2, "beta": 1 / 3}, {"a": 2, "scale": 3}), @@ -313,7 +313,7 @@ def test_match_scipy(p_dist, sp_dist, p_params, sp_params): if preliz_name == "HalfStudentT": assert_almost_equal(actual_median, expected_median, decimal=1) - elif preliz_name == "SkewNormal": + elif preliz_name in ["SkewNormal", "ExGaussian"]: assert_almost_equal(actual_median, expected_median, decimal=6) else: assert_almost_equal(actual_median, expected_median)