Fix mle bugs in more distributions (#435)

* fix mle estimations * fix test * fix test
arviz-devs · May 15, 2024 · 516e653 · 516e653
1 parent 4b4c540
commit 516e653
Show file tree

Hide file tree

Showing 8 changed files with 67 additions and 75 deletions.
diff --git a/preliz/distributions/asymmetric_laplace.py b/preliz/distributions/asymmetric_laplace.py
@@ -5,6 +5,7 @@
 
 from .distributions import Continuous
 from ..internal.distribution_helper import all_not_none, eps
+from ..internal.optimization import optimize_ml
 
 
 class AsymmetricLaplace(Continuous):
@@ -178,9 +179,8 @@ def _fit_moments(self, mean, sigma):
         b = (sigma / 2) * (2**0.5)
         self._update(1, mu, b)
 
-    def _fit_mle(self, sample, **kwargs):
-        kappa, mu, b = nb_fit_mle(sample)
-        self._update(kappa, mu, b)
+    def _fit_mle(self, sample):
+        optimize_ml(self, sample)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -230,13 +230,3 @@ def nb_rvs(random_samples, mu, b, kappa):
 @nb.njit(cache=True)
 def nb_entropy(b, kappa):
     return 1 + np.log(kappa + 1 / kappa) + np.log(b)
-
-
-@nb.njit(cache=True)
-def nb_fit_mle(sample):
-    new_mu = np.median(sample)
-    new_b = np.mean(np.abs(sample - new_mu))
-    new_kappa = np.sum((sample - new_mu) * np.sign(sample - new_mu)) / np.sum(
-        np.abs(sample - new_mu)
-    )
-    return new_kappa, new_mu, new_b
diff --git a/preliz/distributions/exgaussian.py b/preliz/distributions/exgaussian.py
@@ -6,7 +6,7 @@
 
 from .distributions import Continuous
 from ..internal.distribution_helper import eps, all_not_none
-from ..internal.special import erf, erfc, erfcx, mean_and_std
+from ..internal.special import erf, mean_and_std, norm_logcdf
 from ..internal.optimization import find_ppf
 
 
@@ -143,7 +143,7 @@ def kurtosis(self):
     def rvs(self, size=None, random_state=None):
         random_state = np.random.default_rng(random_state)
         return random_state.normal(self.mu, self.sigma, size) + random_state.exponential(
-            1 / self.nu, size
+            self.nu, size
         )
 
     def _fit_moments(self, mean, sigma):
@@ -152,11 +152,11 @@ def _fit_moments(self, mean, sigma):
 
     def _fit_mle(self, sample):
         mean, std = mean_and_std(sample)
-        skweness = skew(sample)
+        skweness = max(1e-4, skew(sample))
         nu = std * (skweness / 2) ** (1 / 3)
         mu = mean - nu
         var = std**2 * (1 - (skweness / 2) ** (2 / 3))
-        self._update(mu, var**0.5, 1 / nu)
+        self._update(mu, var**0.5, nu)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -179,7 +179,7 @@ def nb_logpdf(x, mu, sigma, nu):
             -np.log(nu)
             + (mu - x) / nu
             + 0.5 * (sigma / nu) ** 2
-            + normal_lcdf(x, mu + (sigma**2) / nu, sigma)
+            + norm_logcdf((x - (mu + (sigma**2) / nu)) / sigma)
         )
     else:
         return -np.log(sigma) - 0.5 * np.log(2 * np.pi) - 0.5 * ((x - mu) / sigma) ** 2
@@ -188,12 +188,3 @@ def nb_logpdf(x, mu, sigma, nu):
 @nb.njit(cache=True)
 def nb_neg_logpdf(x, mu, sigma, nu):
     return -(nb_logpdf(x, mu, sigma, nu)).sum()
-
-
-@nb.vectorize(nopython=True, cache=True)
-def normal_lcdf(x, mu, sigma):
-    z_val = (x - mu) / sigma
-    if z_val < -1:
-        return np.log(erfcx(-z_val / 2**0.5) / 2) - abs(z_val) ** 2 / 2
-    else:
-        return np.log1p(-erfc(z_val / 2**0.5) / 2)
diff --git a/preliz/distributions/skewnormal.py b/preliz/distributions/skewnormal.py
@@ -8,7 +8,7 @@
 from .distributions import Continuous
 from ..internal.distribution_helper import eps, to_precision, from_precision, all_not_none
 from ..internal.special import erf, norm_logcdf
-from ..internal.optimization import find_ppf, optimize_ml
+from ..internal.optimization import find_ppf, optimize_ml, optimize_moments
 
 
 class SkewNormal(Continuous):
@@ -176,8 +176,9 @@ def rvs(self, size=None, random_state=None):
         return np.sign(u_0) * u_1 * self.sigma + self.mu
 
     def _fit_moments(self, mean, sigma):
-        # Assume gaussian
-        self._update(mean, sigma, 0)
+        if self.alpha is None:
+            self.alpha = 0
+        optimize_moments(self, mean, sigma)
 
     def _fit_mle(self, sample):
         skewness = skew(sample)

diff --git a/preliz/distributions/triangular.py b/preliz/distributions/triangular.py
@@ -4,7 +4,6 @@
 import numpy as np
 import numba as nb
 
-from ..internal.optimization import optimize_ml
 from ..internal.distribution_helper import all_not_none
 from .distributions import Continuous
 
@@ -172,7 +171,10 @@ def _fit_moments(self, mean, sigma):
         self._update(lower, c, upper)
 
     def _fit_mle(self, sample):
-        optimize_ml(self, sample)
+        lower = np.min(sample)
+        upper = np.max(sample)
+        middle = (np.mean(sample) * 3) - lower - upper
+        self._update(lower, middle, upper)
 
 
 @nb.vectorize(nopython=True, cache=True)

diff --git a/preliz/distributions/truncatednormal.py b/preliz/distributions/truncatednormal.py
@@ -3,7 +3,7 @@
 import numpy as np
 import numba as nb
 
-from ..internal.special import cdf_bounds, erf, erfinv, mean_and_std, ppf_bounds_cont
+from ..internal.special import cdf_bounds, erf, erfinv, ppf_bounds_cont
 from ..internal.optimization import optimize_ml
 from ..internal.distribution_helper import eps, all_not_none
 from .distributions import Continuous
@@ -128,14 +128,14 @@ def entropy(self):
     def mean(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         return (
             self.mu
             + (
                 (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * alpha**2))
                 - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * beta**2))
             )
-            / z
+            / z_val
             * self.sigma
         )
 
@@ -150,7 +150,7 @@ def median(self):
     def var(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -160,13 +160,13 @@ def var(self):
                 psi_beta[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 - psi_alpha[1] * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2))
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         )
@@ -177,7 +177,7 @@ def std(self):
     def skewness(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -190,7 +190,7 @@ def skewness(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - 3
             * (
                 psi_alpha[1]
@@ -204,14 +204,14 @@ def skewness(self):
                 (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                 - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
             )
-            / z**2
+            / z_val**2
             + 2
             * (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 3
         )
@@ -225,13 +225,13 @@ def skewness(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         ) ** (3 / 2)
@@ -240,7 +240,7 @@ def skewness(self):
     def kurtosis(self):
         alpha = (self.lower - self.mu) / self.sigma
         beta = (self.upper - self.mu) / self.sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         # Handle for -np.inf or np.inf
         psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
         psi_beta = (0, 0) if beta == np.inf else (1, beta)
@@ -261,7 +261,7 @@ def kurtosis(self):
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
                 ** 2
-                / z**3
+                / z_val**3
             )
             - (
                 4
@@ -277,7 +277,7 @@ def kurtosis(self):
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z**2
+                / z_val**2
             )
             - (
                 3
@@ -290,7 +290,7 @@ def kurtosis(self):
                         * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                         * psi_beta[0]
                     )
-                    / z
+                    / z_val
                 )
                 ** 2
             )
@@ -301,7 +301,7 @@ def kurtosis(self):
                         (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                         - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                     )
-                    / z
+                    / z_val
                 )
                 ** 4
             )
@@ -313,7 +313,7 @@ def kurtosis(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
         )
 
         denominator = (
@@ -326,13 +326,13 @@ def kurtosis(self):
                 * (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2))
                 * psi_beta[0]
             )
-            / z
+            / z_val
             - (
                 (
                     (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[0]
                     - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[0]
                 )
-                / z
+                / z_val
             )
             ** 2
         ) ** 2
@@ -349,8 +349,7 @@ def _fit_moments(self, mean, sigma):
         self._update(mean, sigma)
 
     def _fit_mle(self, sample):
-        mean, sigma = mean_and_std(sample)
-        self._update(mean, sigma, np.min(sample), np.max(sample))
+        self._update(None, None, np.min(sample), np.max(sample))
         optimize_ml(self, sample)
 
 
@@ -359,8 +358,8 @@ def nb_cdf(x, mu, sigma, lower, upper):
     xi = (x - mu) / sigma
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
-    prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    prob = (0.5 * (1 + erf(xi / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))) / z_val
     return cdf_bounds(prob, x, lower, upper)
 
 
@@ -383,14 +382,14 @@ def nb_ppf(q, mu, sigma, lower, upper):
 def nb_entropy(mu, sigma, lower, upper):
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
     # Handle for -np.inf or np.inf
     psi_alpha = (0, 0) if alpha == -np.inf else (1, alpha)
     psi_beta = (0, 0) if beta == np.inf else (1, beta)
-    return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z) + (
+    return np.log((2 * np.pi * np.e) ** 0.5 * sigma * z_val) + (
         (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_alpha[1] ** 2)) * psi_alpha[1] * psi_alpha[0]
         - (1 / (2 * np.pi) ** 0.5 * np.exp(-0.5 * psi_beta[1] ** 2)) * psi_beta[1] * psi_beta[0]
-    ) / (2 * z)
+    ) / (2 * z_val)
 
 
 @nb.vectorize(nopython=True, cache=True)
@@ -401,9 +400,9 @@ def nb_logpdf(x, mu, sigma, lower, upper):
         xi = (x - mu) / sigma
         alpha = (lower - mu) / sigma
         beta = (upper - mu) / sigma
-        z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+        z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
         logphi = np.log(1 / (2 * np.pi) ** 0.5) - xi**2 / 2
-        return logphi - (np.log(sigma) + np.log(z))
+        return logphi - (np.log(sigma) + np.log(z_val))
 
 
 @nb.njit(cache=True)
@@ -415,6 +414,8 @@ def nb_neg_logpdf(x, mu, sigma, lower, upper):
 def nb_rvs(random_samples, mu, sigma, lower, upper):
     alpha = (lower - mu) / sigma
     beta = (upper - mu) / sigma
-    z = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
-    inv_phi = 2**0.5 * erfinv(2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z) - 1)
+    z_val = 0.5 * (1 + erf(beta / 2**0.5)) - 0.5 * (1 + erf(alpha / 2**0.5))
+    inv_phi = 2**0.5 * erfinv(
+        2 * (0.5 * (1 + erf(alpha / 2**0.5)) + random_samples * z_val) - 1
+    )
     return inv_phi * sigma + mu
diff --git a/preliz/internal/special.py b/preliz/internal/special.py
@@ -492,7 +492,7 @@ def xprody(x, y):
 
 @nb.vectorize(nopython=True, cache=True)
 def norm_logcdf(x):
-    t = x * np.sqrt(0.5)
+    t = x * 0.7071067811865476
     if x < -1.0:
         return np.log(erfcx(-t) / 2) - t * t
     else: