Skip to content

Commit

Permalink
add informed scale model
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Apr 15, 2024
1 parent a306f04 commit 4010a0e
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 10 deletions.
8 changes: 5 additions & 3 deletions build/lib/glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ def select_grouping(cohort_b, cohort_a, glycans, p_values, paired = False, group
def get_differential_expression(df, group1, group2,
motifs = False, feature_set = ['exhaustive', 'known'], paired = False,
impute = True, sets = False, set_thresh = 0.9, effect_size_variance = False,
min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", gamma = 0.1):
min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR",
gamma = 0.1, custom_scale = 0):
"""Calculates differentially expressed glycans or motifs from glycomics data\n
| Arguments:
| :-
Expand All @@ -530,7 +531,8 @@ def get_differential_expression(df, group1, group2,
| grouped_BH (bool): whether to perform two-stage adaptive Benjamini-Hochberg as a grouped multiple testing correction; will SIGNIFICANTLY increase runtime; default:False
| custom_motifs (list): list of glycan motifs, used if feature_set includes 'custom'; default:empty
| transform (str): transformation to escape Aitchison space; options are CLR and ALR (use ALR if you have many glycans (>100) with low values); default:CLR
| gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1\n
| gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1
| custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n
| Returns:
| :-
| Returns a dataframe with:
Expand Down Expand Up @@ -560,7 +562,7 @@ def get_differential_expression(df, group1, group2,
if transform == "ALR":
df = get_additive_logratio_transformation(df, group1, group2, paired = paired)
elif transform == "CLR":
df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma)
df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale)
else:
raise ValueError("Only ALR and CLR are valid transforms for now.")
# Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment
Expand Down
15 changes: 11 additions & 4 deletions glycowork/glycan_data/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,14 +772,15 @@ def get_equivalence_test(row_a, row_b, paired = False):
return ttost_paired(row_a, row_b, low, up)[0] if paired else ttost_ind(row_a, row_b, low, up)[0]


def clr_transformation(df, group1, group2, gamma = 0.1):
def clr_transformation(df, group1, group2, gamma = 0.1, custom_scale = 0):
"""performs the Center Log-Ratio (CLR) Transformation with scale model adjustment\n
| Arguments:
| :-
| df (dataframe): dataframe containing features in rows and samples in columns
| group1 (list): list of column indices or names for the first group of samples, usually the control
| group2 (list): list of column indices or names for the second group of samples
| gamma (float): the degree of uncertainty that the CLR assumption holds; default: 0.1\n
| gamma (float): the degree of uncertainty that the CLR assumption holds; default: 0.1
| custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n
| Returns:
| :-
| Returns a dataframe that is CLR-transformed with scale model adjustment
Expand All @@ -791,6 +792,12 @@ def clr_transformation(df, group1, group2, gamma = 0.1):
group2i = [col_list.index(k) for k in group2]
case_control = [0]*len(group1) + [1]*len(group2)
clr_adjusted = np.zeros_like(df.values)
if custom_scale:
control = norm.rvs(loc = np.log2(1), scale = gamma, size = (df.shape[0], len(group1)))
clr_adjusted[:, group1i] = np.log2(df[group1]) + control
condition = norm.rvs(loc = np.log2(custom_scale), scale = gamma, size = (df.shape[0], len(group2)))
clr_adjusted[:, group2i] = np.log2(df[group2]) + condition
return pd.DataFrame(clr_adjusted, index = df.index, columns = df.columns)
geometric_mean = -np.log2(geometric_mean)
clr_adjusted[:, group1i] = np.log2(df[group1]) + geometric_mean[group1i]
observed = norm.rvs(loc = geometric_mean[group2i], scale = gamma, size = (df.shape[0], len(group2)))
Expand Down Expand Up @@ -991,8 +998,8 @@ def correct_multiple_testing(pvals, alpha):
corrpvals = [p if p >= pvals[i] else pvals[i] for i, p in enumerate(corrpvals)]
significance = [p < alpha for p in corrpvals]
if sum(significance) > 0.9*len(significance):
print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset. \
Proceed with caution; for now switching to Bonferroni correction for being conservative about this.")
print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset.\
Proceed with caution; for now switching to Bonferroni correction to be conservative about this.")
corrpvals = multipletests(pvals, method = 'bonferroni')[1]
significance = [p < alpha for p in corrpvals]
return corrpvals, significance
8 changes: 5 additions & 3 deletions glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,8 @@ def select_grouping(cohort_b, cohort_a, glycans, p_values, paired = False, group
def get_differential_expression(df, group1, group2,
motifs = False, feature_set = ['exhaustive', 'known'], paired = False,
impute = True, sets = False, set_thresh = 0.9, effect_size_variance = False,
min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR", gamma = 0.1):
min_samples = 0.1, grouped_BH = False, custom_motifs = [], transform = "CLR",
gamma = 0.1, custom_scale = 0):
"""Calculates differentially expressed glycans or motifs from glycomics data\n
| Arguments:
| :-
Expand All @@ -530,7 +531,8 @@ def get_differential_expression(df, group1, group2,
| grouped_BH (bool): whether to perform two-stage adaptive Benjamini-Hochberg as a grouped multiple testing correction; will SIGNIFICANTLY increase runtime; default:False
| custom_motifs (list): list of glycan motifs, used if feature_set includes 'custom'; default:empty
| transform (str): transformation to escape Aitchison space; options are CLR and ALR (use ALR if you have many glycans (>100) with low values); default:CLR
| gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1\n
| gamma (float): uncertainty parameter to estimate scale uncertainty for CLR transformation; default: 0.1
| custom_scale (float): if you *know* the difference in scale between groups, provide the ratio of group2/group1 for an informed scale model\n
| Returns:
| :-
| Returns a dataframe with:
Expand Down Expand Up @@ -560,7 +562,7 @@ def get_differential_expression(df, group1, group2,
if transform == "ALR":
df = get_additive_logratio_transformation(df, group1, group2, paired = paired)
elif transform == "CLR":
df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma)
df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale)
else:
raise ValueError("Only ALR and CLR are valid transforms for now.")
# Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment
Expand Down

0 comments on commit 4010a0e

Please sign in to comment.