From 222255684163afce821d3cfb3edfd812678b7795 Mon Sep 17 00:00:00 2001 From: Daniel Bojar Date: Wed, 17 Apr 2024 07:20:21 +0200 Subject: [PATCH] Nothing transform for comparison; make min_samples more robust --- build/lib/glycowork/motif/analysis.py | 2 ++ glycowork/glycan_data/stats.py | 6 +++--- glycowork/motif/analysis.py | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/build/lib/glycowork/motif/analysis.py b/build/lib/glycowork/motif/analysis.py index 502fa05..350f716 100644 --- a/build/lib/glycowork/motif/analysis.py +++ b/build/lib/glycowork/motif/analysis.py @@ -566,6 +566,8 @@ def get_differential_expression(df, group1, group2, df = get_additive_logratio_transformation(df, group1, group2, paired = paired) elif transform == "CLR": df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale) + elif transform == "Nothing": + pass else: raise ValueError("Only ALR and CLR are valid transforms for now.") # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment diff --git a/glycowork/glycan_data/stats.py b/glycowork/glycan_data/stats.py index 760d221..74542d7 100644 --- a/glycowork/glycan_data/stats.py +++ b/glycowork/glycan_data/stats.py @@ -234,9 +234,9 @@ def impute_and_normalize(df, groups, impute = True, min_samples = 0.1): | Returns a dataframe in the same style as the input """ if min_samples: - min_count = np.floor(df.shape[1] * min_samples) + min_count = max(np.floor(df.shape[1] * min_samples), 2) + 1 mask = (df != 0).sum(axis = 1) >= min_count - df = df[mask] + df = df[mask].reset_index(drop = True) colname = df.columns[0] glycans = df[colname] df = df.iloc[:, 1:] @@ -998,7 +998,7 @@ def correct_multiple_testing(pvals, alpha): corrpvals = [p if p >= pvals[i] else pvals[i] for i, p in enumerate(corrpvals)] significance = [p < alpha for p in corrpvals] if sum(significance) > 0.9*len(significance): - print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset.\ + print("Significance inflation detected. The CLR/ALR transformation cannot seem to handle this dataset. Consider running again with a higher gamma value.\ Proceed with caution; for now switching to Bonferroni correction to be conservative about this.") res = multipletests(pvals, method = 'bonferroni') corrpvals, alpha = res[1], res[3] diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py index 502fa05..350f716 100644 --- a/glycowork/motif/analysis.py +++ b/glycowork/motif/analysis.py @@ -566,6 +566,8 @@ def get_differential_expression(df, group1, group2, df = get_additive_logratio_transformation(df, group1, group2, paired = paired) elif transform == "CLR": df.iloc[:, 1:] = clr_transformation(df.iloc[:, 1:], group1, group2, gamma = gamma, custom_scale = custom_scale) + elif transform == "Nothing": + pass else: raise ValueError("Only ALR and CLR are valid transforms for now.") # Sample-size aware alpha via Bayesian-Adaptive Alpha Adjustment