From 541828496ab507a4d096f11b9d93cafab608084c Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Mon, 18 Sep 2023 14:37:30 -0700 Subject: [PATCH 1/3] Add NB LME Single --- .../negative_binomial_lme_single.stan | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 birdman/templates/negative_binomial_lme_single.stan diff --git a/birdman/templates/negative_binomial_lme_single.stan b/birdman/templates/negative_binomial_lme_single.stan new file mode 100644 index 0000000..d22cf17 --- /dev/null +++ b/birdman/templates/negative_binomial_lme_single.stan @@ -0,0 +1,51 @@ +data { + int N; // number of samples + int S; // number of groups (subjects) + int p; // number of covariates + real A; // mean intercept + vector[N] depth; // log sequencing depths of microbes + matrix[N, p] x; // covariate matrix + array[N] int y; // observed microbe abundances + array[N] int subj_ids; // mapping of samples to subject IDs + + real B_p; // stdev for beta normal prior + real inv_disp_sd; // stdev for inv disp lognormal prior + real u_p; // stdev for subject intercept normal prior +} + +parameters { + real beta_0; + vector[p-1] beta_x; + real inv_disp; + vector[S] subj_int; +} + +transformed parameters { + vector[p] beta_var = append_row(beta_0, beta_x); + vector[N] lam = x * beta_var + depth; + + for (n in 1:N){ + lam[n] += subj_int[subj_ids[n]]; + } +} + +model { + inv_disp ~ lognormal(0., inv_disp_sd); + beta_0 ~ normal(A, B_p); + beta_x ~ normal(0, B_p); + for (j in 1:S){ + subj_int[j] ~ normal(0., u_p); + } + + y ~ neg_binomial_2_log(lam, inv(inv_disp)); +} + +generated quantities { + vector[N] log_lhood; + vector[N] y_predict; + + for (n in 1:N){ + y_predict[n] = neg_binomial_2_log_rng(lam[n], inv(inv_disp)); + log_lhood[n] = neg_binomial_2_log_lpmf(y[n] | lam[n], inv(inv_disp)); + } +} From 9f1ae0930d46aceb11f62967039d1ff674448f3b Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Mon, 18 Sep 2023 14:54:40 -0700 Subject: [PATCH 2/3] Add single feature LME code --- birdman/__init__.py | 4 +- birdman/default_models.py | 123 ++++++++++++++++++- birdman/templates/negative_binomial_lme.stan | 6 +- tests/test_model.py | 16 ++- 4 files changed, 138 insertions(+), 11 deletions(-) diff --git a/birdman/__init__.py b/birdman/__init__.py index 15e2999..1aa5dda 100644 --- a/birdman/__init__.py +++ b/birdman/__init__.py @@ -1,10 +1,10 @@ from .model_base import (BaseModel, TableModel, SingleFeatureModel, ModelIterator) from .default_models import (NegativeBinomial, NegativeBinomialLME, - NegativeBinomialSingle) + NegativeBinomialSingle, NegativeBinomialLMESingle) __version__ = "0.1.0" __all__ = ["BaseModel", "TableModel", "SingleFeatureModel", "ModelIterator", "NegativeBinomial", "NegativeBinomialSingle", - "NegativeBinomialLME"] + "NegativeBinomialLME", "NegativeBinomialLMESingle"] diff --git a/birdman/default_models.py b/birdman/default_models.py index 6eeef9c..1663efb 100644 --- a/birdman/default_models.py +++ b/birdman/default_models.py @@ -1,4 +1,4 @@ -import os +from os.path import join as pjoin from pkg_resources import resource_filename import biom @@ -10,9 +10,10 @@ TEMPLATES = resource_filename("birdman", "templates") DEFAULT_MODEL_DICT = { "negative_binomial": { - "standard": os.path.join(TEMPLATES, "negative_binomial.stan"), - "single": os.path.join(TEMPLATES, "negative_binomial_single.stan"), - "lme": os.path.join(TEMPLATES, "negative_binomial_lme.stan") + "standard": pjoin(TEMPLATES, "negative_binomial.stan"), + "single": pjoin(TEMPLATES, "negative_binomial_single.stan"), + "full_lme": pjoin(TEMPLATES, "negative_binomial_lme.stan"), + "single_lme": pjoin(TEMPLATES, "negative_binomial_lme_single.stan") } } @@ -270,7 +271,7 @@ def __init__( inv_disp_sd: float = 0.5, group_var_prior: float = 1.0 ): - filepath = DEFAULT_MODEL_DICT["negative_binomial"]["lme"] + filepath = DEFAULT_MODEL_DICT["negative_binomial"]["full_lme"] super().__init__( table=table, model_path=filepath, @@ -317,3 +318,115 @@ def __init__( posterior_predictive="y_predict", log_likelihood="log_lhood" ) + + +class NegativeBinomialLMESingle(SingleFeatureModel): + """Fit count data using negative binomial model on single feature. + + .. math:: + + y_{ij} &\\sim \\textrm{NB}(\\mu_{ij}, \\phi_j) + + \\log(\\mu_{ij}) &= \\log(\\textrm{Depth}_i) + x_i \\beta + z_i u + + Priors: + + .. math:: + + \\beta_j \\sim \\begin{cases} + \\textrm{Normal}(A, B_p), & j = 0 + + \\textrm{Normal}(0, B_p), & j > 0 + \\end{cases} + + .. math:: A = \\ln{\\frac{1}{D}},\\ D = \\textrm{Number of features} + + .. math:: + + \\frac{1}{\\phi_j} \\sim \\textrm{Lognormal}(0, s),\\ s \\in + \\mathbb{R}_{>0} + + .. math:: + + u_j &\\sim \\textrm{Normal}(0, u_p),\\ u_p \\in \\mathbb{R}_{>0} + + + :param table: Feature table (features x samples) + :type table: biom.table.Table + + :param feature_id: ID of feature to fit + :type feature_id: str + + :param formula: Design formula to use in model + :type formula: str + + :param group_var: Variable in metadata to use as grouping + :type group_var: str + + :param metadata: Metadata for design matrix + :type metadata: pd.DataFrame + + :param beta_prior: Standard deviation for normally distributed prior values + of beta, defaults to 5.0 + :type beta_prior: float + + :param inv_disp_sd: Standard deviation for lognormally distributed prior + values of 1/phi, defaults to 0.5 + :type inv_disp_sd: float + """ + def __init__( + self, + table: biom.table.Table, + feature_id: str, + formula: str, + group_var: str, + metadata: pd.DataFrame, + beta_prior: float = 5.0, + inv_disp_sd: float = 0.5, + group_var_prior: float = 1.0 + ): + filepath = DEFAULT_MODEL_DICT["negative_binomial"]["single_lme"] + + super().__init__( + table=table, + feature_id=feature_id, + model_path=filepath, + ) + self.create_regression(formula=formula, metadata=metadata) + + D = table.shape[0] + A = np.log(1 / D) + + # Encode group IDs starting at 1 because Stan 1-indexes arrays + group_var_series = metadata[group_var].loc[self.sample_names] + samp_subj_map = group_var_series.astype("category").cat.codes + 1 + # Encoding as categories uses alphabetic sorting + self.groups = np.sort(group_var_series.unique()) + + param_dict = { + "depth": np.log(table.sum(axis="sample")), + "B_p": beta_prior, + "inv_disp_sd": inv_disp_sd, + "A": A, + "subj_ids": samp_subj_map, + "u_p": group_var_prior + } + self.add_parameters(param_dict) + + self.specify_model( + params=["beta_var", "inv_disp", "subj_int"], + dims={ + "beta_var": ["covariate"], + "log_lhood": ["tbl_sample"], + "y_predict": ["tbl_sample"], + "subj_int": ["group"] + }, + coords={ + "covariate": self.colnames, + "tbl_sample": self.sample_names, + "group": self.groups + }, + include_observed_data=True, + posterior_predictive="y_predict", + log_likelihood="log_lhood" + ) diff --git a/birdman/templates/negative_binomial_lme.stan b/birdman/templates/negative_binomial_lme.stan index 09e266d..adf5c90 100644 --- a/birdman/templates/negative_binomial_lme.stan +++ b/birdman/templates/negative_binomial_lme.stan @@ -48,7 +48,7 @@ model { // generating counts for (n in 1:N){ for (i in 1:D){ - target += neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv_disp[i]); + target += neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv(inv_disp[i])); } } } @@ -59,8 +59,8 @@ generated quantities { for (n in 1:N){ for (i in 1:D){ - y_predict[n, i] = neg_binomial_2_log_rng(lam_clr[n, i], inv_disp[i]); - log_lhood[n, i] = neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv_disp[i]); + y_predict[n, i] = neg_binomial_2_log_rng(lam_clr[n, i], inv(inv_disp[i])); + log_lhood[n, i] = neg_binomial_2_log_lpmf(y[n, i] | lam_clr[n, i], inv(inv_disp[i])); } } } diff --git a/tests/test_model.py b/tests/test_model.py index d8d6131..bc7f8ac 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -4,7 +4,8 @@ import numpy as np from birdman import (NegativeBinomial, NegativeBinomialLME, - NegativeBinomialSingle, ModelIterator) + NegativeBinomialSingle, NegativeBinomialLMESingle, + ModelIterator) TEMPLATES = resource_filename("birdman", "templates") @@ -70,6 +71,19 @@ def test_single_feat(self, table_biom, metadata): nb.compile_model() nb.fit_model(num_draws=100) + def test_lme_single_feat(self, table_biom, metadata): + md = metadata.copy() + for fid in table_biom.ids(axis="observation"): + nb = NegativeBinomialLMESingle( + table=table_biom, + feature_id=fid, + formula="host_common_name", + group_var="group", + metadata=md, + ) + nb.compile_model() + nb.fit_model(num_draws=100) + class TestToInference: def test_serial_to_inference(self, example_model): From a1df85915ec7f9494821f61f64e4ce43de24e57d Mon Sep 17 00:00:00 2001 From: Gibraan Rahman Date: Mon, 18 Sep 2023 15:01:52 -0700 Subject: [PATCH 3/3] Add SingleFeature NB LME --- .gitignore | 1 + birdman/default_models.py | 3 ++- tests/test_model.py | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3d1a5c4..d8d94db 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ birdman/templates/negative_binomial birdman/templates/multinomial birdman/templates/negative_binomial_single birdman/templates/negative_binomial_lme +birdman/templates/negative_binomial_lme_single tests/custom_model *__pycache__/ diff --git a/birdman/default_models.py b/birdman/default_models.py index bd95d4f..19916e9 100644 --- a/birdman/default_models.py +++ b/birdman/default_models.py @@ -410,7 +410,8 @@ def __init__( "inv_disp_sd": inv_disp_sd, "A": A, "subj_ids": samp_subj_map, - "u_p": group_var_prior + "u_p": group_var_prior, + "S": len(self.groups) } self.add_parameters(param_dict) diff --git a/tests/test_model.py b/tests/test_model.py index 48a76d6..55829b9 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -73,6 +73,9 @@ def test_single_feat(self, table_biom, metadata): def test_lme_single_feat(self, table_biom, metadata): md = metadata.copy() + np.random.seed(42) + md["group"] = np.random.randint(low=0, high=3, size=md.shape[0]) + md["group"] = "G" + md["group"].astype(str) for fid in table_biom.ids(axis="observation"): nb = NegativeBinomialLMESingle( table=table_biom,