From 4010a0e845e3bf810c578536153e3ecc698fa367 Mon Sep 17 00:00:00 2001 From: Daniel Bojar Date: Mon, 15 Apr 2024 19:08:09 +0200 Subject: [PATCH] add informed scale model --- build/lib/glycowork/motif/analysis.py | 8 +++++--- glycowork/glycan_data/stats.py | 15 +++++++++++---- glycowork/motif/analysis.py | 8 +++++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/build/lib/glycowork/motif/analysis.py b/build/lib/glycowork/motif/analysis.py index a5fc0f1..31d9fe9 100644 --- a/build/lib/glycowork/motif/analysis.py +++ b/build/lib/glycowork/motif/analysis.py @@ -509,7 +509,8 @@ def select_grouping(cohort_b, cohort_a, glycans, p_values, paired = False, group def get_differential_expression(df, group1, group2, motifs = False, feature_set = ['exhaustive', 'known'], paired = False, impute = True, sets = False, set_thresh = 0.9, effect_size_variance = False, - min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", gamma = 0.1): + min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", + gamma = 0.1, custom_scale = 0): """Calculates differentially expressed glycans or motifs from glycomics data\n | Arguments: | :- @@ -530,7 +531,8 @@ def get_differential_expression(df, group1, group2, | grouped_BH (bool): whether to perform two-stage adaptive Benjamini-Hochberg as a grouped multiple testing correction; will SIGNIFICANTLY increase runtime; default:False | custom_motifs (list): list of glycan motifs, used if feature_set includes 'custom'; default:empty | transform (str): transformation to escape Aitchison space; options are CLR and ALR (use ALR if you have many glycans (>100) with low values); default:CLR - | gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1\n + | gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1 + | custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n | Returns: | :- | Returns a dataframe with: @@ -560,7 +562,7 @@ def get_differential_expression(df, group1, group2, if transform == "ALR": df = get_additive_logratio_transformation(df, group1, group2, paired = paired) elif transform == "CLR": - df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma) + df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale) else: raise ValueError("Only ALR and CLR are valid transforms for now.") # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment diff --git a/glycowork/glycan_data/stats.py b/glycowork/glycan_data/stats.py index 70bbcc9..ebbb6e7 100644 --- a/glycowork/glycan_data/stats.py +++ b/glycowork/glycan_data/stats.py @@ -772,14 +772,15 @@ def get_equivalence_test(row_a, row_b, paired = False): return ttost_paired(row_a, row_b, low, up)[0] if paired else ttost_ind(row_a, row_b, low, up)[0] -def clr_transformation(df, group1, group2, gamma = 0.1): +def clr_transformation(df, group1, group2, gamma = 0.1, custom_scale = 0): """performs the Center Log-Ratio (CLR) Transformation with scale model adjustment\n | Arguments: | :- | df (dataframe): dataframe containing features in rows and samples in columns | group1 (list): list of column indices or names for the first group of samples, usually the control | group2 (list): list of column indices or names for the second group of samples - | gamma (float): the degree of uncertainty that the CLR assumption holds; default: 0.1\n + | gamma (float): the degree of uncertainty that the CLR assumption holds; default: 0.1 + | custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n | Returns: | :- | Returns a dataframe that is CLR-transformed with scale model adjustment @@ -791,6 +792,12 @@ def clr_transformation(df, group1, group2, gamma = 0.1): group2i = [col_list.index(k) for k in group2] case_control = [0]*len(group1) + [1]*len(group2) clr_adjusted = np.zeros_like(df.values) + if custom_scale: + control = norm.rvs(loc = np.log2(1), scale = gamma, size = (df.shape[0], len(group1))) + clr_adjusted[:, group1i] = np.log2(df[group1]) + control + condition = norm.rvs(loc = np.log2(custom_scale), scale = gamma, size = (df.shape[0], len(group2))) + clr_adjusted[:, group2i] = np.log2(df[group2]) + condition + return pd.DataFrame(clr_adjusted, index = df.index, columns = df.columns) geometric_mean = -np.log2(geometric_mean) clr_adjusted[:, group1i] = np.log2(df[group1]) + geometric_mean[group1i] observed = norm.rvs(loc = geometric_mean[group2i], scale = gamma, size = (df.shape[0], len(group2))) @@ -991,8 +998,8 @@ def correct_multiple_testing(pvals, alpha): corrpvals = [p if p >= pvals[i] else pvals[i] for i, p in enumerate(corrpvals)] significance = [p < alpha for p in corrpvals] if sum(significance) > 0.9*len(significance): - print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset. \ - Proceed with caution; for now switching to Bonferroni correction for being conservative about this.") + print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset.\ + Proceed with caution; for now switching to Bonferroni correction to be conservative about this.") corrpvals = multipletests(pvals, method = 'bonferroni')[1] significance = [p < alpha for p in corrpvals] return corrpvals, significance diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py index a5fc0f1..31d9fe9 100644 --- a/glycowork/motif/analysis.py +++ b/glycowork/motif/analysis.py @@ -509,7 +509,8 @@ def select_grouping(cohort_b, cohort_a, glycans, p_values, paired = False, group def get_differential_expression(df, group1, group2, motifs = False, feature_set = ['exhaustive', 'known'], paired = False, impute = True, sets = False, set_thresh = 0.9, effect_size_variance = False, - min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", gamma = 0.1): + min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", + gamma = 0.1, custom_scale = 0): """Calculates differentially expressed glycans or motifs from glycomics data\n | Arguments: | :- @@ -530,7 +531,8 @@ def get_differential_expression(df, group1, group2, | grouped_BH (bool): whether to perform two-stage adaptive Benjamini-Hochberg as a grouped multiple testing correction; will SIGNIFICANTLY increase runtime; default:False | custom_motifs (list): list of glycan motifs, used if feature_set includes 'custom'; default:empty | transform (str): transformation to escape Aitchison space; options are CLR and ALR (use ALR if you have many glycans (>100) with low values); default:CLR - | gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1\n + | gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1 + | custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n | Returns: | :- | Returns a dataframe with: @@ -560,7 +562,7 @@ def get_differential_expression(df, group1, group2, if transform == "ALR": df = get_additive_logratio_transformation(df, group1, group2, paired = paired) elif transform == "CLR": - df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma) + df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale) else: raise ValueError("Only ALR and CLR are valid transforms for now.") # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment