Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ordinal families #678

Merged
merged 10 commits into from
May 31, 2023
28 changes: 20 additions & 8 deletions bambi/backend/model_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,40 @@
from bambi.backend.terms import CommonTerm, GroupSpecificTerm, HSGPTerm, InterceptTerm, ResponseTerm
from bambi.backend.utils import get_distribution_from_prior
from bambi.families.multivariate import MultivariateFamily
from bambi.families.univariate import Categorical
from bambi.families.univariate import Categorical, Cumulative, StoppingRatio
from bambi.utils import get_aliased_name


ORDINAL_FAMILIES = (Cumulative, StoppingRatio)


class ConstantComponent:
def __init__(self, component):
self.component = component
self.output = 0

def build(self, pymc_backend, bmb_model): # pylint: disable = unused-argument
def build(self, pymc_backend, bmb_model):
extra_args = {}

if self.component.alias:
label = self.component.alias
else:
label = self.component.name

if isinstance(bmb_model.family, ORDINAL_FAMILIES):
threshold_dim = label + "_dim"
threshold_values = np.arange(len(bmb_model.response_component.response_term.levels) - 1)
extra_args["dims"] = threshold_dim
pymc_backend.model.add_coords({threshold_dim: threshold_values})

with pymc_backend.model:
if self.component.alias:
label = self.component.alias
else:
label = self.component.name
# It's set to a constant value
# Set to a constant value
if isinstance(self.component.prior, (int, float)):
self.output = self.component.prior
# Set to a distribution
else:
dist = get_distribution_from_prior(self.component.prior)
self.output = dist(label, **self.component.prior.args)
self.output = dist(label, **self.component.prior.args, **extra_args)


class DistributionalComponent:
Expand Down
24 changes: 13 additions & 11 deletions bambi/backend/terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,7 @@ def build(self, pymc_backend, bmb_model):
data = np.squeeze(self.term.data)
parent = self.family.likelihood.parent

# The linear predictor for the parent parameter (usually the mean)
nu = pymc_backend.distributional_components[self.term.name].output

if hasattr(self.family, "transform_backend_nu"):
nu = self.family.transform_backend_nu(nu, data)

# Add auxiliary parameters
# Auxiliary parameters
kwargs = {}

# Constant parameters. No link function is used.
Expand All @@ -252,13 +246,21 @@ def build(self, pymc_backend, bmb_model):
f"{self.name}_{aliased_name}", linkinv(component.output), dims=dims
)

# Add observed and dims
kwargs["observed"] = data
kwargs["dims"] = dims

# The linear predictor for the parent parameter (usually the mean)
eta = pymc_backend.distributional_components[self.term.name].output

if hasattr(self.family, "transform_backend_eta"):
eta = self.family.transform_backend_eta(eta, kwargs)

# Take the inverse link function that maps from linear predictor to the parent of likelihood
linkinv = get_linkinv(self.family.link[parent], pymc_backend.INVLINKS)

# Add parent parameter and observed data. We don't need to pass dims.
kwargs[parent] = linkinv(nu)
kwargs["observed"] = data
kwargs["dims"] = dims
# Add parent parameter after the applying the linkinv transformation
kwargs[parent] = linkinv(eta)

# Build the response distribution
dist = self.build_response_distribution(kwargs, pymc_backend)
Expand Down
6 changes: 5 additions & 1 deletion bambi/backend/utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import pytensor.tensor as pt
import pymc as pm

MAPPING = {"Cumulative": pm.Categorical, "StoppingRatio": pm.Categorical}


def get_distribution(dist):
"""Return a PyMC distribution."""
if isinstance(dist, str):
if hasattr(pm, dist):
if dist in MAPPING:
dist = MAPPING[dist]
elif hasattr(pm, dist):
dist = getattr(pm, dist)
else:
raise ValueError(f"The Distribution '{dist}' was not found in PyMC")
Expand Down
23 changes: 22 additions & 1 deletion bambi/defaults/families.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
BetaBinomial,
Binomial,
Categorical,
Cumulative,
Gamma,
Gaussian,
HurdleGamma,
Expand All @@ -15,6 +16,7 @@
NegativeBinomial,
Laplace,
Poisson,
StoppingRatio,
StudentT,
VonMises,
Wald,
Expand All @@ -37,7 +39,6 @@
"family": AsymmetricLaplace,
"default_priors": {"b": "HalfNormal", "kappa": "HalfNormal"}
},

"bernoulli": {
"likelihood": {
"name": "Bernoulli",
Expand Down Expand Up @@ -85,6 +86,16 @@
"link": {"p": "softmax"},
"family": Categorical,
},
"cumulative": {
"likelihood": {
"name": "Cumulative",
"params": ["p", "threshold"],
"parent": "p",
},
"link": {"p": "logit", "threshold": "identity"},
"family": Cumulative,
"default_priors": {"threshold": "Normal"},
},
"dirichlet_multinomial": {
"likelihood": {
"name": "DirichletMultinomial",
Expand Down Expand Up @@ -192,6 +203,16 @@
"link": {"mu": "log"},
"family": Poisson,
},
"sratio": {
"likelihood": {
"name": "StoppingRatio",
"params": ["p", "threshold"],
"parent": "p",
},
"link": {"p": "logit", "threshold": "identity"},
"family": StoppingRatio,
"default_priors": {"threshold": "Normal"},
},
"t": {
"likelihood": {
"name": "StudentT",
Expand Down
11 changes: 8 additions & 3 deletions bambi/families/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,15 @@ def posterior_predictive(self, model, posterior, **kwargs):
continue
kwargs[key] = expand_array(values, ndims_max)

if hasattr(model.family, "transform_backend_kwargs"):
kwargs = model.family.transform_backend_kwargs(kwargs)
if hasattr(model.family, "transform_kwargs"):
kwargs = model.family.transform_kwargs(kwargs)

output_array = pm.draw(response_dist.dist(**kwargs))
output_coords_all = xr.merge(output_dataset_list).coords

coord_names = ["chain", "draw", response_aliased_name + "_obs"]
if hasattr(model.family, "KIND") and model.family.KIND == "Multivariate":
is_multivariate = hasattr(model.family, "KIND") and model.family.KIND == "Multivariate"
if is_multivariate:
coord_names.append(response_aliased_name + "_dim")

output_coords = {}
Expand Down Expand Up @@ -206,8 +207,12 @@ def get_response_dist(family):
pm.Distribution
The response distribution
"""
mapping = {"Cumulative": pm.Categorical, "StoppingRatio": pm.Categorical}

if family.likelihood.dist:
dist = family.likelihood.dist
elif family.likelihood.name in mapping:
dist = mapping[family.likelihood.name]
else:
dist = getattr(pm, family.likelihood.name)
return dist
Expand Down
1 change: 1 addition & 0 deletions bambi/families/likelihood.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"BetaBinomial": DistSettings(params=("mu", "kappa"), parent="mu"),
"Binomial": DistSettings(params=("p",), parent="p"),
"Categorical": DistSettings(params=("p",), parent="p"),
"Cumulative": DistSettings(params=("p", "threshold"), parent="p"),
"DirichletMultinomial": DistSettings(params=("a",), parent="a"),
"Gamma": DistSettings(params=("mu", "alpha"), parent="mu"),
"Multinomial": DistSettings(params=("p",), parent="p"),
Expand Down
22 changes: 13 additions & 9 deletions bambi/families/multivariate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=unused-argument
import numpy as np
import pytensor.tensor as pt
import xarray as xr

from bambi.families.family import Family
from bambi.transformations import transformations_namespace
Expand All @@ -15,7 +16,10 @@ class Multinomial(MultivariateFamily):
SUPPORTED_LINKS = {"p": ["softmax"]}
INVLINK_KWARGS = {"axis": -1}

def transform_linear_predictor(self, model, linear_predictor):
@staticmethod
def transform_linear_predictor(
model, linear_predictor: xr.DataArray, posterior: xr.DataArray
) -> xr.DataArray: # pylint: disable = unused-variable
response_name = get_aliased_name(model.response_component.response_term)
response_levels_dim = response_name + "_reduced_dim"
linear_predictor = linear_predictor.pad({response_levels_dim: (1, 0)}, constant_values=0)
Expand Down Expand Up @@ -50,19 +54,20 @@ def get_levels(self, response):

@staticmethod
def transform_backend_kwargs(kwargs):
if "observed" in kwargs:
kwargs["n"] = kwargs["observed"].sum(axis=1).astype(int)
kwargs["n"] = kwargs["observed"].sum(axis=1).astype(int)
return kwargs

@staticmethod
def transform_backend_nu(nu, data):
def transform_backend_eta(eta, kwargs):
data = kwargs["observed"]

# Add column of zeros to the linear predictor for the reference level (the first one)
shape = (data.shape[0], 1)

# The first line makes sure the intercept-only models work
nu = np.ones(shape) * nu # (response_levels, ) -> (n, response_levels)
nu = pt.concatenate([np.zeros(shape), nu], axis=1)
return nu
eta = np.ones(shape) * eta # (response_levels, ) -> (n, response_levels)
eta = pt.concatenate([np.zeros(shape), eta], axis=1)
return eta


class DirichletMultinomial(MultivariateFamily):
Expand All @@ -86,6 +91,5 @@ def get_levels(self, response):

@staticmethod
def transform_backend_kwargs(kwargs):
if "observed" in kwargs:
kwargs["n"] = kwargs["observed"].sum(axis=1).astype(int)
kwargs["n"] = kwargs["observed"].sum(axis=1).astype(int)
return kwargs
Loading