Skip to content

Commit

Permalink
resolve merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
mbi6245 committed Dec 10, 2024
2 parents 3056d7a + 68777cf commit 72e57d2
Show file tree
Hide file tree
Showing 8 changed files with 241 additions and 167 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,5 @@ dmypy.json
*.csv
*.parquet
diabetes_demo.ipynb
ensemble_fpg_plots.png
ensemble_fpg_plots.png
sensitivity_analysis.ipynb
176 changes: 96 additions & 80 deletions plots.ipynb

Large diffs are not rendered by default.

198 changes: 119 additions & 79 deletions src/ensemble/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,9 @@ def __init__(
self,
mean: float = None,
variance: float = None,
lb: float = None,
ub: float = None,
):
self.mean = mean
self.variance = variance
self.lb = lb
self.ub = ub
# # some kind of dictionary with
# # key: the support (full real line, semi infinite, etc...)
# # value: function that gets called when distribution is initialized
Expand Down Expand Up @@ -164,6 +160,8 @@ class Fisk(Distribution):

def support(self) -> Tuple[float, float]:
return (0, np.inf)
# when a user passes in a different finite bound (i.e. the lb)
# fit a distribution with a translation in the mean only, no diff to variance b/c scaling doesn't make sense

def _create_scipy_dist(self):
positive_support(self.mean)
Expand Down Expand Up @@ -218,6 +216,7 @@ def _create_scipy_dist(self) -> None:
# https://real-statistics.com/distribution-fitting/method-of-moments/method-of-moments-weibull/
k = opt.root_scalar(self._func, x0=0.5, method="newton")
lambda_ = self.mean / gamma_func(1 + 1 / k.root)
print("hi!", lambda_, k.root)

# most likely a parameterization issue
self._scipy_dist = stats.weibull_min(c=k.root, scale=lambda_)
Expand Down Expand Up @@ -266,41 +265,73 @@ def _create_scipy_dist(self) -> None:

# analytic sol
class Beta(Distribution):
# TODO: WANT TO BE ABLE TO PASS IN UPPER AND LOWER BOUNDS TO BE REFLECTED IN THE DIST
# EX: MEAN 6, VAR 0.2, LB 5, UB 10
# ADJ_MEAN = (MEAN - LB) / INTERVAL_WIDTH
# ADJ_VAR = VAR / INTERVAL_WIDTH
# INPUT ADJ MEAN & VAR INTO FUNCTION
# JUST GET RVS TO WORK FOR NOW, WHEN YOU TAKE A SAMPLE OF SIZE 100,
# JUST MULTIPLICATIVELY SCALE AND THEN LINERALY SHIFT THE DATA TO THE ORIGINAL BOUNDS
"""https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.beta.html#scipy.stats.beta"""

def __init__(
self,
mean: float = None,
variance: float = None,
lb: float = 0,
ub: float = 1,
):
self.lb = lb
self.ub = ub
self.width = np.abs(ub - lb)
super().__init__(mean, variance)

def _squeeze(self, x: float) -> float:
"""transform x to be within (0, 1)
Parameters
----------
x : float
value within support
Returns
-------
float
transformed value within support
"""
return (x - self.lb) / self.width

def _stretch(self, x: float) -> float:
"""transform x from (0, 1) back to original bounds
Parameters
----------
x : float
value within standard Beta support
Returns
-------
float
transformed value within original support
"""
return (x + self.lb) * self.width

def support(self) -> Tuple[float, float]:
return (0, 1)
return (self.lb, self.ub)

def _create_scipy_dist(self) -> None:
if self.mean**2 <= self.variance:
raise ValueError(
"beta distributions do not exist for certain mean and variance "
+ "combinations. The supplied variance must be in between "
+ "(0, mean^2)"
)
beta_bounds(self.mean)
alpha = (
self.mean**2 * (1 - self.mean) - self.mean * self.variance
) / self.variance
beta = (
(1 - self.mean)
* (self.mean - self.mean**2 - self.variance)
/ self.variance
)
if self.lb != 0 and self.ub != 1:
mean = (self.mean - self.lb) / self.width
var = self.variance / self.width
else:
mean = self.mean
var = self.variance

alpha = (mean**2 * (1 - mean) - mean * var) / var
beta = (1 - mean) * (mean - mean**2 - var) / var
print(alpha, beta)
self._scipy_dist = stats.beta(a=alpha, b=beta)


class MSCABeta(Distribution):
def _create_scipy_dist(self) -> None:
self.width = self.ub - self.lb
adj_mean = (self.mean - self.lb) / self.width
adj_var = self.variance / self.width
self._scipy_dist = Beta(adj_mean, adj_var)

def support(self) -> Tuple[float, float]:
"""create tuple representing endpoints of support"""
print(self._scipy_dist.stats("mv"))

def rvs(self, *args, **kwds):
"""defaults to scipy implementation for generating random variates
Expand All @@ -310,7 +341,7 @@ def rvs(self, *args, **kwds):
np.ndarray
random variates from a given distribution/parameters
"""
return (self._scipy_dist.rvs(*args, **kwds) + self.lb) * self.width
return self._stretch(self._scipy_dist.rvs(*args, **kwds))

def pdf(self, x: npt.ArrayLike) -> np.ndarray:
"""defaults to scipy implementation for probability density function
Expand All @@ -325,52 +356,62 @@ def pdf(self, x: npt.ArrayLike) -> np.ndarray:
np.ndarray
PDF evaluated at quantile x
"""
return (self._scipy_dist.pdf(x) + self.lb) * self.width

# def cdf(self, q: npt.ArrayLike) -> np.ndarray:
# """defaults to scipy implementation for cumulative density function

# Parameters
# ----------
# q : npt.ArrayLike
# quantiles

# Returns
# -------
# np.ndarray
# CDF evaluated at quantile q
# """
# return self._scipy_dist.cdf(q)

# def ppf(self, p: npt.ArrayLike) -> np.ndarray:
# """defaults to scipy implementation for percent point function

# Parameters
# ----------
# p : npt.ArrayLike
# lower tail probability

# Returns
# -------
# np.ndarray
# PPF evaluated at lower tail probability p
# """
# return self._scipy_dist.ppf(p)

# def stats(self, moments: str) -> Union[float, Tuple[float, ...]]:
# """defaults to scipy implementation for obtaining moments

# Parameters
# ----------
# moments : str
# m for mean, v for variance, s for skewness, k for kurtosis

# Returns
# -------
# Union[float, Tuple[float, ...]]
# mean, variance, skewness, and/or kurtosis
# """
# return self._scipy_dist.stats(moments=moments)
return self._scipy_dist.pdf(self._squeeze(x))

def cdf(self, q: npt.ArrayLike) -> np.ndarray:
"""defaults to scipy implementation for cumulative density function
Parameters
----------
q : npt.ArrayLike
quantiles
Returns
-------
np.ndarray
CDF evaluated at quantile q
"""
return self._scipy_dist.cdf(self._squeeze(q))

def ppf(self, p: npt.ArrayLike) -> np.ndarray:
"""defaults to scipy implementation for percent point function
Parameters
----------
p : npt.ArrayLike
lower tail probability
Returns
-------
np.ndarray
PPF evaluated at lower tail probability p
"""
return self._stretch(self._scipy_dist.ppf(p))

def stats(self, moments: str) -> Union[float, Tuple[float, ...]]:
"""defaults to scipy implementation for obtaining moments
Parameters
----------
moments : str
m for mean, v for variance, s for skewness, k for kurtosis
Returns
-------
Union[float, Tuple[float, ...]]
mean, variance, skewness, and/or kurtosis
"""
res_list = []
if "m" in moments:
res_list.append(self._stretch(self._scipy_dist.stats("m")))
if "v" in moments:
res_list.append(self._scipy_dist.stats("v") * self.width)

# res_list = [res[()] for res in res_list]
if len(res_list) == 1:
return res_list[0]
else:
return tuple(res_list)


distribution_dict = {
Expand All @@ -383,7 +424,6 @@ def pdf(self, x: npt.ArrayLike) -> np.ndarray:
"lognormal": LogNormal,
"normal": Normal,
"beta": Beta,
"MSCAbeta": MSCABeta,
}


Expand Down
19 changes: 18 additions & 1 deletion src/ensemble/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import json
import warnings
from typing import List, Tuple, Union

import cvxpy as cp
Expand Down Expand Up @@ -38,6 +39,8 @@ def __init__(
named_weights: dict,
mean: float,
variance: float,
lb: float = None,
ub: float = None,
):
self._distributions = list(named_weights.keys())
self._weights = list(named_weights.values())
Expand All @@ -53,6 +56,18 @@ def __init__(
)
self.mean = mean
self.variance = variance
if lb is not None and self.cdf(lb) > 0.05:
warnings.warn(
"Ensemble density less than the specified lower bound "
+ lb
+ " exceeds 0.05. Check for low sample size!"
)
if ub is not None and (1 - self.cdf(ub)) > 0.05:
warnings.warn(
"Ensemble density greater than the specified upper bound "
+ ub
+ " exceeds 0.05. Check for low sample size!"
)

def _ppf_to_solve(self, x: float, p: float) -> float:
"""ensemble_CDF(x) - lower tail probability
Expand Down Expand Up @@ -145,13 +160,15 @@ def cdf(self, q: npt.ArrayLike) -> np.ndarray:
)
)

def ppf(self, p: npt.ArrayLike) -> np.ndarray:
def ppf(self, p: npt.ArrayLike, uncertainty: bool = True) -> np.ndarray:
"""percent point function of ensemble distribution
Parameters
----------
p : npt.ArrayLike
lower tail probability
uncertainty : bool, optional
return a 95% CI using the delta method about p
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion test_read.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"named_weights": {"normal": 0.5, "gumbel": 0.5}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}]
[{"named_weights": {"normal": 0.5, "gumbel": 0.5}, "mean": 1, "variance": 1}]
5 changes: 2 additions & 3 deletions tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def test_gumbel():


def test_weibull():
weibull = Weibull(MEAN, VARIANCE)
weibull = Weibull(624.25, 183.791**2)
res = weibull.stats(moments="mv")
print("resulting mean and var: ", res)
assert np.isclose(res[0], MEAN)
Expand Down Expand Up @@ -92,9 +92,8 @@ def test_normal():

def test_beta():
beta = Beta(BETA_MEAN, BETA_VARIANCE)
# beta = Beta(0.5, 0.249)
res = beta.stats(moments="mv")
print("resulting mean and var: ", res)
assert False
assert np.isclose(res[0], BETA_MEAN)
assert np.isclose(res[1], BETA_VARIANCE)

Expand Down
4 changes: 2 additions & 2 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ def test_json():
model1 = EnsembleDistribution(
{"gamma": 0.2, "invgamma": 0.8}, *DEFAULT_SETTINGS
)
model1.to_json("test_read.json", appending=True)
model1.to_json("tests/test_read.json", appending=True)

m1 = EnsembleDistribution.from_json("test_read.json")[1]
m1 = EnsembleDistribution.from_json("tests/test_read.json")[1]
assert m1.stats_temp("mv") == DEFAULT_SETTINGS
assert m1._distributions == ["gamma", "invgamma"]
assert m1._weights == [0.2, 0.8]
1 change: 1 addition & 0 deletions tests/test_read.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"named_weights": {"normal": 0.5, "gumbel": 0.5}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}, {"named_weights": {"gamma": 0.2, "invgamma": 0.8}, "mean": 1, "variance": 1}]

0 comments on commit 72e57d2

Please sign in to comment.