From 516e6534272107dd9eb07f2b905540c999aae021 Mon Sep 17 00:00:00 2001
From: Osvaldo A Martin <aloctavodia@gmail.com>
Date: Wed, 15 May 2024 15:55:29 -0300
Subject: [PATCH] Fix mle bugs in more distributions (#435)

* fix mle estimations

* fix test

* fix test
---
 preliz/distributions/asymmetric_laplace.py | 16 ++----
 preliz/distributions/exgaussian.py         | 19 ++-----
 preliz/distributions/skewnormal.py         |  7 +--
 preliz/distributions/triangular.py         |  6 ++-
 preliz/distributions/truncatednormal.py    | 63 +++++++++++-----------
 preliz/internal/special.py                 |  2 +-
 preliz/tests/test_mle.py                   | 23 +++++---
 preliz/tests/test_scipy.py                 |  6 +--
 8 files changed, 67 insertions(+), 75 deletions(-)

diff --git a/preliz/distributions/asymmetric_laplace.py b/preliz/distributions/asymmetric_laplace.py
index 87078f25..802f805f 100644
--- a/preliz/distributions/asymmetric_laplace.py
+++ b/preliz/distributions/asymmetric_laplace.py
@@ -5,6 +5,7 @@
 
 from .distributions import Continuous
 from ..internal.distribution_helper import all_not_none, eps
+from ..internal.optimization import optimize_ml
 
 
 class AsymmetricLaplace(Continuous):
@@ -178,9 +179,8 @@ def _fit_moments(self, mean, sigma):
         b = (sigma / 2) * (2**0.5)
         self._update(1, mu, b)
 
-    def _fit_mle(self, sample, **kwargs):
-        kappa, mu, b = nb_fit_mle(sample)
-        self._update(kappa, mu, b)
+    def _fit_mle(self, sample):
+        optimize_ml(self, sample)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -230,13 +230,3 @@ def nb_rvs(random_samples, mu, b, kappa):
 @nb.njit(cache=True)
 def nb_entropy(b, kappa):
     return 1 + np.log(kappa + 1 / kappa) + np.log(b)
-
-
-@nb.njit(cache=True)
-def nb_fit_mle(sample):
-    new_mu = np.median(sample)
-    new_b = np.mean(np.abs(sample - new_mu))
-    new_kappa = np.sum((sample - new_mu) * np.sign(sample - new_mu)) / np.sum(
-        np.abs(sample - new_mu)
-    )
-    return new_kappa, new_mu, new_b
diff --git a/preliz/distributions/exgaussian.py b/preliz/distributions/exgaussian.py
index 0edaf747..ebdd41ae 100644
--- a/preliz/distributions/exgaussian.py
+++ b/preliz/distributions/exgaussian.py
@@ -6,7 +6,7 @@
 
 from .distributions import Continuous
 from ..internal.distribution_helper import eps, all_not_none
-from ..internal.special import erf, erfc, erfcx, mean_and_std
+from ..internal.special import erf, mean_and_std, norm_logcdf
 from ..internal.optimization import find_ppf
 
 
@@ -143,7 +143,7 @@ def kurtosis(self):
     def rvs(self, size=None, random_state=None):
         random_state = np.random.default_rng(random_state)
         return random_state.normal(self.mu, self.sigma, size) + random_state.exponential(
-            1 / self.nu, size
+            self.nu, size
         )
 
     def _fit_moments(self, mean, sigma):
@@ -152,11 +152,11 @@ def _fit_moments(self, mean, sigma):
 
     def _fit_mle(self, sample):
         mean, std = mean_and_std(sample)
-        skweness = skew(sample)
+        skweness = max(1e-4, skew(sample))
         nu = std * (skweness / 2) ** (1 / 3)
         mu = mean - nu
         var = std**2 * (1 - (skweness / 2) ** (2 / 3))
-        self._update(mu, var**0.5, 1 / nu)
+        self._update(mu, var**0.5, nu)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -179,7 +179,7 @@ def nb_logpdf(x, mu, sigma, nu):
             -np.log(nu)
             + (mu - x) / nu
             + 0.5 * (sigma / nu) ** 2
-            + normal_lcdf(x, mu + (sigma**2) / nu, sigma)
+            + norm_logcdf((x - (mu + (sigma**2) / nu)) / sigma)
         )
     else:
         return -np.log(sigma) - 0.5 * np.log(2 * np.pi) - 0.5 * ((x - mu) / sigma) ** 2
@@ -188,12 +188,3 @@ def nb_logpdf(x, mu, sigma, nu):
 @nb.njit(cache=True)
 def nb_neg_logpdf(x, mu, sigma, nu):
     return -(nb_logpdf(x, mu, sigma, nu)).sum()
-
-
-@nb.vectorize(nopython=True, cache=True)
-def normal_lcdf(x, mu, sigma):
-    z_val = (x - mu) / sigma
-    if z_val < -1:
-        return np.log(erfcx(-z_val / 2**0.5) / 2) - abs(z_val) ** 2 / 2
-    else:
-        return np.log1p(-erfc(z_val / 2**0.5) / 2)
diff --git a/preliz/distributions/skewnormal.py b/preliz/distributions/skewnormal.py
index 896c9a42..228631a1 100644
--- a/preliz/distributions/skewnormal.py
+++ b/preliz/distributions/skewnormal.py
@@ -8,7 +8,7 @@
 from .distributions import Continuous
 from ..internal.distribution_helper import eps, to_precision, from_precision, all_not_none
 from ..internal.special import erf, norm_logcdf
-from ..internal.optimization import find_ppf, optimize_ml
+from ..internal.optimization import find_ppf, optimize_ml, optimize_moments
 
 
 class SkewNormal(Continuous):
@@ -176,8 +176,9 @@ def rvs(self, size=None, random_state=None):
         return np.sign(u_0) * u_1 * self.sigma + self.mu
 
     def _fit_moments(self, mean, sigma):
-        # Assume gaussian
-        self._update(mean, sigma, 0)
+        if self.alpha is None:
+            self.alpha = 0
+        optimize_moments(self, mean, sigma)
 
     def _fit_mle(self, sample):
         skewness = skew(sample)
diff --git a/preliz/distributions/triangular.py b/preliz/distributions/triangular.py
index 5bf91848..03cf1315 100644
--- a/preliz/distributions/triangular.py
+++ b/preliz/distributions/triangular.py
@@ -4,7 +4,6 @@
 import numpy as np
 import numba as nb
 
-from ..internal.optimization import optimize_ml
 from ..internal.distribution_helper import all_not_none
 from .distributions import Continuous
 
@@ -172,7 +171,10 @@ def _fit_moments(self, mean, sigma):
         self._update(lower, c, upper)
 
     def _fit_mle(self, sample):
-        optimize_ml(self, sample)
+        lower = np.min(sample)
+        upper = np.max(sample)
+        middle = (np.mean(sample) * 3) - lower - upper
+        self._update(lower, middle, upper)
 
 
 @nb.vectorize(nopython=True, cache=True)
diff --git a/preliz/distributions/truncatednormal.py b/preliz/distributions/truncatednormal.py
index 357cb63a..5bcc3bea 100644
--- a/preliz/distributions/truncatednormal.py
+++ b/preliz/distributions/truncatednormal.py
@@ -3,7 +3,7 @@
 import numpy as np
 import numba as nb
 
-from ..internal.special import cdf_bounds, erf, erfinv, mean_and_std, ppf_bounds_cont
+from ..internal.special import cdf_bounds, erf, erfinv, ppf_bounds_cont
 from ..internal.optimization import optimize_ml
 from ..internal.distribution_helper import eps, all_not_none
 from .distributions import Continuous
@@ -128,14 +128,14 @@ def entropy(self):
     def mean(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         return (
             self.mu
             + (
                 (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * alpha**2))
                 - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * beta**2))
             )
-            / z
+            / z_val
             * self.sigma
         )
 
@@ -150,7 +150,7 @@ def median(self):
     def var(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -160,13 +160,13 @@ def var(self):
                 psi_beta[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 - psi_alpha[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2))
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         )
@@ -177,7 +177,7 @@ def std(self):
     def skewness(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -190,7 +190,7 @@ def skewness(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - 3
             * (
                 psi_alpha[1]
@@ -204,14 +204,14 @@ def skewness(self):
                 (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                 - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
             )
-            / z**2
+            / z_val**2
             + 2
             * (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 3
         )
@@ -225,13 +225,13 @@ def skewness(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         ) ** (3 / 2)
@@ -240,7 +240,7 @@ def skewness(self):
     def kurtosis(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -261,7 +261,7 @@ def kurtosis(self):
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
                 ** 2
-                / z**3
+                / z_val**3
             )
             - (
                 4
@@ -277,7 +277,7 @@ def kurtosis(self):
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z**2
+                / z_val**2
             )
             - (
                 3
@@ -290,7 +290,7 @@ def kurtosis(self):
                         * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                         * psi_beta[0]
                     )
-                    / z
+                    / z_val
                 )
                 ** 2
             )
@@ -301,7 +301,7 @@ def kurtosis(self):
                         (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                         - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                     )
-                    / z
+                    / z_val
                 )
                 ** 4
             )
@@ -313,7 +313,7 @@ def kurtosis(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
         )
 
         denominator = (
@@ -326,13 +326,13 @@ def kurtosis(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         ) ** 2
@@ -349,8 +349,7 @@ def _fit_moments(self, mean, sigma):
         self._update(mean, sigma)
 
     def _fit_mle(self, sample):
-        mean, sigma = mean_and_std(sample)
-        self._update(mean, sigma, np.min(sample), np.max(sample))
+        self._update(None, None, np.min(sample), np.max(sample))
         optimize_ml(self, sample)
 
 
@@ -359,8 +358,8 @@ def nb_cdf(x, mu, sigma, lower, upper):
     xi = (x - mu) / sigma
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
-    prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z_val
     return cdf_bounds(prob, x, lower, upper)
 
 
@@ -383,14 +382,14 @@ def nb_ppf(q, mu, sigma, lower, upper):
 def nb_entropy(mu, sigma, lower, upper):
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
     # Handle for -np.inf or np.inf
     psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
     psi_beta = (0, 0) if beta == np.inf else (1, beta)
-    return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z) + (
+    return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z_val) + (
         (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[1] * psi_alpha[0]
         - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[1] * psi_beta[0]
-    ) / (2 * z)
+    ) / (2 * z_val)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -401,9 +400,9 @@ def nb_logpdf(x, mu, sigma, lower, upper):
         xi = (x - mu) / sigma
         alpha = (lower - mu) / sigma
         beta = (upper - mu) / sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         logphi = np.log(1 / (2 * np.pi) ** 0.5) - xi**2 / 2
-        return logphi - (np.log(sigma) + np.log(z))
+        return logphi - (np.log(sigma) + np.log(z_val))
 
 
 @nb.njit(cache=True)
@@ -415,6 +414,8 @@ def nb_neg_logpdf(x, mu, sigma, lower, upper):
 def nb_rvs(random_samples, mu, sigma, lower, upper):
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
-    inv_phi = 2**0.5 * erfinv(2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z) - 1)
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    inv_phi = 2**0.5 * erfinv(
+        2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z_val) - 1
+    )
     return inv_phi * sigma + mu
diff --git a/preliz/internal/special.py b/preliz/internal/special.py
index df2469c7..70eb2c2e 100644
--- a/preliz/internal/special.py
+++ b/preliz/internal/special.py
@@ -492,7 +492,7 @@ def xprody(x, y):
 
 @nb.vectorize(nopython=True, cache=True)
 def norm_logcdf(x):
-    t = x * np.sqrt(0.5)
+    t = x * 0.7071067811865476
     if x < -1.0:
         return np.log(erfcx(-t) / 2) - t * t
     else:
diff --git a/preliz/tests/test_mle.py b/preliz/tests/test_mle.py
index b562c2c0..ddac733b 100644
--- a/preliz/tests/test_mle.py
+++ b/preliz/tests/test_mle.py
@@ -58,7 +58,7 @@
         (Beta, (2, 5)),
         (BetaScaled, (2, 5, -1, 4)),
         (Cauchy, (0, 1)),
-        (ChiSquared, (1,)),
+        (ChiSquared, (5,)),
         (ExGaussian, (0, 1, 3)),
         (Exponential, (5,)),
         (Gamma, (2, 5)),
@@ -77,25 +77,25 @@
         (Normal, (0, 1)),
         (Pareto, (5, 1)),
         (Rice, (0, 2)),
-        (SkewNormal, (0, 1, -1)),
+        (SkewNormal, (0, 1, -6)),
         (SkewStudentT, (0, 1, 2, 2)),
         (StudentT, (4, 0, 1)),
-        (Triangular, (0, 2, 4)),
-        (TruncatedNormal, (0, 1, -1, 1)),
+        (Triangular, (0, 3, 4)),
+        (TruncatedNormal, (0, 0.5, -1, 1)),
         (Uniform, (2, 5)),
         (VonMises, (1, 2)),
         (Wald, (2, 1)),
         (Weibull, (2, 1)),
         (Bernoulli, (0.5,)),
-        (BetaBinomial, (1, 2, 10)),
+        (BetaBinomial, (2, 5, 10)),
         (Binomial, (5, 0.5)),
         (DiscreteUniform, (-2, 2)),
         (DiscreteWeibull, (0.9, 1.3)),
         (Geometric, (0.75,)),
         (HyperGeometric, (50, 10, 20)),
-        (NegativeBinomial, (10, 0.5)),
+        (NegativeBinomial, (10, 2.5)),
         (Poisson, (4.2,)),
-        (ZeroInflatedBinomial, (0.5, 10, 0.8)),
+        (ZeroInflatedBinomial, (0.5, 10, 0.6)),
         (ZeroInflatedNegativeBinomial, (0.7, 8, 4)),
         (
             ZeroInflatedPoisson,
@@ -111,8 +111,15 @@ def test_auto_recover(distribution, params):
         sample = distribution(*params).rvs(10_000)
         dist = distribution()
         try:
+            if dist.__class__.__name__ in [
+                "BetaScaled",
+                "TruncatedNormal",
+            ]:
+                tol = 1
+            else:
+                tol = 0.1
             pz.mle([dist], sample)
-            assert_allclose(dist.params, params, atol=1)
+            assert_allclose(dist.params, params, atol=tol)
             break
         except AssertionError:
             pass
diff --git a/preliz/tests/test_scipy.py b/preliz/tests/test_scipy.py
index 214c2511..b5523876 100644
--- a/preliz/tests/test_scipy.py
+++ b/preliz/tests/test_scipy.py
@@ -71,8 +71,8 @@
         (
             ExGaussian,
             stats.exponnorm,
-            {"mu": -1, "sigma": 0.5, "nu": 1},
-            {"loc": -1, "scale": 0.5, "K": 1 / 0.5},
+            {"mu": -1, "sigma": 2, "nu": 5},
+            {"loc": -1, "scale": 2, "K": 5 / 2},
         ),
         (Exponential, stats.expon, {"beta": 3.7}, {"scale": 3.7}),
         (Gamma, stats.gamma, {"alpha": 2, "beta": 1 / 3}, {"a": 2, "scale": 3}),
@@ -313,7 +313,7 @@ def test_match_scipy(p_dist, sp_dist, p_params, sp_params):
 
     if preliz_name == "HalfStudentT":
         assert_almost_equal(actual_median, expected_median, decimal=1)
-    elif preliz_name == "SkewNormal":
+    elif preliz_name in ["SkewNormal", "ExGaussian"]:
         assert_almost_equal(actual_median, expected_median, decimal=6)
     else:
         assert_almost_equal(actual_median, expected_median)