-
Notifications
You must be signed in to change notification settings - Fork 248
Add beta distribution #391
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
"""The NGBoost Beta distribution and scores""" | ||
import numpy as np | ||
from scipy.special import digamma, polygamma | ||
from scipy.stats import beta as dist | ||
|
||
from ngboost.distns.distn import RegressionDistn | ||
from ngboost.scores import LogScore | ||
|
||
|
||
class BetaLogScore(LogScore): | ||
"""Log score for the Beta distribution.""" | ||
|
||
def score(self, Y): | ||
"""Calculate the log score for the Beta distribution.""" | ||
return -self.dist.logpdf(Y) | ||
|
||
def d_score(self, Y): | ||
"""Calculate the derivative of the log score with respect to the parameters.""" | ||
D = np.zeros( | ||
(len(Y), 2) | ||
) # first col is dS/d(log(a)), second col is dS/d(log(b)) | ||
D[:, 0] = -self.a * (digamma(self.a + self.b) - digamma(self.a) + np.log(Y)) | ||
D[:, 1] = -self.b * (digamma(self.a + self.b) - digamma(self.b) + np.log(1 - Y)) | ||
return D | ||
|
||
def metric(self): | ||
"""Return the Fisher Information matrix for the Beta distribution.""" | ||
FI = np.zeros((self.a.shape[0], 2, 2)) | ||
trigamma_a_b = polygamma(1, self.a + self.b) | ||
FI[:, 0, 0] = self.a**2 * (polygamma(1, self.a) - trigamma_a_b) | ||
FI[:, 0, 1] = -self.a * self.b * trigamma_a_b | ||
FI[:, 1, 0] = -self.a * self.b * trigamma_a_b | ||
FI[:, 1, 1] = self.b**2 * (polygamma(1, self.b) - trigamma_a_b) | ||
return FI | ||
|
||
|
||
class Beta(RegressionDistn): | ||
""" | ||
Implements the Beta distribution for NGBoost. | ||
|
||
The Beta distribution has two parameters, a and b. | ||
The scipy loc and scale parameters are held constant for this implementation. | ||
LogScore is supported for the Beta distribution. | ||
""" | ||
|
||
n_params = 2 | ||
scores = [BetaLogScore] # will implement this later | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add CRPSScore to be consistent? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll take a look at it |
||
|
||
# pylint: disable=super-init-not-called | ||
def __init__(self, params): | ||
self._params = params | ||
|
||
# create other objects that will be useful later | ||
self.log_a = params[0] | ||
self.log_b = params[1] | ||
self.a = np.exp(params[0]) # since params[0] is log(a) | ||
self.b = np.exp(params[1]) # since params[1] is log(b) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might need to introduce clipping here because sometimes the algorithm overflows and sets value a or b to 0. |
||
self.dist = dist(a=self.a, b=self.b) | ||
Comment on lines
+50
to
+58
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something like this might help here
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the feedback :) It's much appreciated. |
||
|
||
@staticmethod | ||
def fit(Y): | ||
"""Fit the distribution to the data.""" | ||
# Use scipy's beta distribution to fit the parameters | ||
# pylint: disable=unused-variable | ||
a, b, loc, scale = dist.fit(Y, floc=0, fscale=1) | ||
return np.array([np.log(a), np.log(b)]) | ||
|
||
def sample(self, m): | ||
"""Sample from the distribution.""" | ||
return np.array([self.dist.rvs() for i in range(m)]) | ||
|
||
def __getattr__( | ||
self, name | ||
): # gives us access to Beta.mean() required for RegressionDist.predict() | ||
if name in dir(self.dist): | ||
return getattr(self.dist, name) | ||
return None | ||
|
||
@property | ||
def params(self): | ||
"""Return the parameters of the Beta distribution.""" | ||
return {"a": self.a, "b": self.b} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.