Skip to content

Commit

Permalink
GUI progress bar + smoothing details
Browse files Browse the repository at this point in the history
- add progress bar to glycoworkGUI
- smarter handling of columns in get_SparCC
- lazyloading of biosynthesis files
  • Loading branch information
Bribak committed Apr 12, 2024
1 parent d0de4f6 commit 8b2ec1a
Show file tree
Hide file tree
Showing 7 changed files with 6,273 additions and 6,234 deletions.
70 changes: 35 additions & 35 deletions 04_network.ipynb

Large diffs are not rendered by default.

12,366 changes: 6,183 additions & 6,183 deletions 05_examples.ipynb

Large diffs are not rendered by default.

41 changes: 35 additions & 6 deletions bin/glycoworkGUI.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import sys
import threading
import tkinter as tk
from tkinter import simpledialog, filedialog, messagebox
from tkinter import simpledialog, filedialog, messagebox, ttk
from glycowork.motif.draw import GlycoDraw, plot_glycans_excel
from glycowork.motif.analysis import get_differential_expression, get_heatmap

Expand All @@ -18,6 +19,26 @@ def resource_path(relative_path):
return os.path.join(base_path, relative_path)


class ProgressDialog(tk.Toplevel):
def __init__(self, parent):
super().__init__(parent)
self.title("Processing")
self.progress = ttk.Progressbar(self, orient = "horizontal", length = 300, mode = 'indeterminate')
self.progress.pack(pady = 20)
self.protocol("WM_DELETE_WINDOW", self.on_close)
self.begin()

def begin(self):
self.progress.start(10)

def end(self):
self.progress.stop()
self.destroy()

def on_close(self):
messagebox.showwarning("Warning", "Process is running. Please wait...")


class GlycoDrawDialog(simpledialog.Dialog):
def body(self, master):
self.title("GlycoDraw Input")
Expand Down Expand Up @@ -170,15 +191,23 @@ def browse_output_folder(self):
self.output_folder_var.set(folder_path)


def openDifferentialExpressionDialog():
dialog_result = DifferentialExpressionDialog(app)
if dialog_result.result:
csv_file_path, treatment_indices, control_indices, motifs, output_folder = dialog_result.result
def run_differential_expression(csv_file_path, treatment_indices, control_indices, motifs, output_folder, progress_dialog):
try:
df_out = get_differential_expression(df = csv_file_path,
group1 = control_indices,
group2 = treatment_indices,
motifs = motifs)
plot_glycans_excel(df_out, output_folder)
finally:
progress_dialog.end()


def openDifferentialExpressionDialog():
dialog_result = DifferentialExpressionDialog(app)
if dialog_result.result:
csv_file_path, treatment_indices, control_indices, motifs, output_folder = dialog_result.result
progress_dialog = ProgressDialog(app)
threading.Thread(target = run_differential_expression, args = (csv_file_path, treatment_indices, control_indices, motifs, output_folder, progress_dialog), daemon = True).start()


class GetHeatmapDialog(simpledialog.Dialog):
Expand All @@ -198,7 +227,7 @@ def body(self, master):
self.motif_analysis_check.grid(row = 1, columnspan = 3, sticky = tk.W)

# Output PDF file selection
tk.Label(master, text="Select Output for PDF File:").grid(row = 2, sticky = tk.W)
tk.Label(master, text = "Select Output for PDF File:").grid(row = 2, sticky = tk.W)
self.output_file_entry = tk.Entry(master)
self.output_file_entry.grid(row = 2, column = 1)
self.output_file_browse = tk.Button(master, text = "Browse...", command = self.browse_output_file)
Expand Down
5 changes: 4 additions & 1 deletion build/lib/glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def get_pvals_motifs(df, glycan_col_name = 'glycan', label_col_name = 'target',
if multiple_samples:
df = df.drop('target', axis = 1, errors = 'ignore').T.reset_index()
df.columns = [glycan_col_name] + [label_col_name] * (len(df.columns) - 1)
#df = df.apply(replace_outliers_winsorization, axis = 1)
if not zscores:
means = df.iloc[:, 1:].mean()
std_devs = df.iloc[:, 1:].std()
Expand Down Expand Up @@ -1144,6 +1143,10 @@ def get_SparCC(df1, df2, motifs = False, feature_set = ["known", "exhaustive"],
df2 = pd.read_csv(df2) if df2.endswith(".csv") else pd.read_excel(df2)
df1.iloc[:, 0] = strip_suffixes(df1.iloc[:, 0])
df2.iloc[:, 0] = strip_suffixes(df2.iloc[:, 0])
if df1.columns.tolist()[0] != df2.columns.tolist()[0] and df1.columns.tolist()[0] in df2.columns.tolist():
common_columns = df1.columns.intersection(df2.columns)
df1 = df1[common_columns]
df2 = df2[common_columns]
# Drop rows with all zero, followed by outlier removal and imputation & normalization
df1 = df1.loc[~(df1.iloc[:, 1:] == 0).all(axis = 1)]
df1 = df1.apply(replace_outliers_winsorization, axis = 1)
Expand Down
3 changes: 1 addition & 2 deletions glycowork/glycan_data/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,8 +945,7 @@ def get_procrustes_scores(df, group1, group2, paired = False):
var_group1 = df[group1].var(axis = 1)
var_group2 = df[group2].var(axis = 1)
variances = abs(var_group1 - var_group2)
procrustes_disparities = [procrustes(ref_matrix.drop(ref_matrix.index[i]), alr_transformation(df, i))[2] for i in range(df.shape[0])]
procrustes_corr = [1 - a for a in procrustes_disparities]
procrustes_corr = [1 - procrustes(ref_matrix.drop(ref_matrix.index[i]), alr_transformation(df, i))[2] for i in range(df.shape[0])]
return [a * (1/b) for a, b in zip(procrustes_corr, variances)], procrustes_corr, variances


Expand Down
5 changes: 4 additions & 1 deletion glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def get_pvals_motifs(df, glycan_col_name = 'glycan', label_col_name = 'target',
if multiple_samples:
df = df.drop('target', axis = 1, errors = 'ignore').T.reset_index()
df.columns = [glycan_col_name] + [label_col_name] * (len(df.columns) - 1)
#df = df.apply(replace_outliers_winsorization, axis = 1)
if not zscores:
means = df.iloc[:, 1:].mean()
std_devs = df.iloc[:, 1:].std()
Expand Down Expand Up @@ -1144,6 +1143,10 @@ def get_SparCC(df1, df2, motifs = False, feature_set = ["known", "exhaustive"],
df2 = pd.read_csv(df2) if df2.endswith(".csv") else pd.read_excel(df2)
df1.iloc[:, 0] = strip_suffixes(df1.iloc[:, 0])
df2.iloc[:, 0] = strip_suffixes(df2.iloc[:, 0])
if df1.columns.tolist()[0] != df2.columns.tolist()[0] and df1.columns.tolist()[0] in df2.columns.tolist():
common_columns = df1.columns.intersection(df2.columns)
df1 = df1[common_columns]
df2 = df2[common_columns]
# Drop rows with all zero, followed by outlier removal and imputation & normalization
df1 = df1.loc[~(df1.iloc[:, 1:] == 0).all(axis = 1)]
df1 = df1.apply(replace_outliers_winsorization, axis = 1)
Expand Down
17 changes: 11 additions & 6 deletions glycowork/network/biosynthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
from glycowork.motif.tokenization import get_stem_lib
from glycowork.motif.regex import get_match

with resources.open_text("glycowork.network", "monolink_to_enzyme.csv") as f:
df_enzyme = pd.read_csv(f, sep = '\t')

this_dir, this_filename = os.path.split(__file__)
data_path = os.path.join(this_dir, 'milk_networks_exhaustive.pkl')
net_dic = pickle.load(open(data_path, 'rb'))

def __getattr__(name):
if name == "net_dic":
net_dic = pickle.load(open(data_path, 'rb'))
globals()[name] = net_dic # Cache it to avoid reloading
return net_dic
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

permitted_roots = {"Gal(b1-4)Glc-ol", "Gal(b1-4)GlcNAc-ol"}
allowed_ptms = {'OS', '3S', '6S', 'OP', '1P', '3P', '6P', 'OAc', '4Ac', '9Ac'}
Expand Down Expand Up @@ -715,6 +718,8 @@ def construct_network(glycans, allowed_ptms = allowed_ptms,
if edge_type == 'monosaccharide':
elem['diffs'] = edge.split('(')[0]
elif edge_type == 'enzyme':
with resources.open_text("glycowork.network", "monolink_to_enzyme.csv") as f:
df_enzyme = pd.read_csv(f, sep = '\t')
elem['diffs'] = monolink_to_glycoenzyme(edge, df_enzyme)
else:
pass
Expand Down Expand Up @@ -1119,7 +1124,7 @@ def evoprune_network(network, network_dic = None, species_list = None,
| Returns pruned network (with virtual node probability as a new node attribute)
"""
if network_dic is None:
network_dic = net_dic
network_dic = pickle.load(open(data_path, 'rb'))
if species_list is None:
species_list = list(network_dic.keys())
# Calculate path probabilities of diamonds
Expand Down Expand Up @@ -1153,7 +1158,7 @@ def highlight_network(network, highlight, motif = None,
| Returns a network with the additional 'origin' (motif/species) or 'abundance' (abundance/conservation) node attribute storing the highlight
"""
if network_dic is None:
network_dic = net_dic
network_dic = pickle.load(open(data_path, 'rb'))
# Determine highlight validity
if highlight not in ['motif', 'species', 'abundance', 'conservation']:
print(f"Invalid highlight argument: {highlight}")
Expand Down

0 comments on commit 8b2ec1a

Please sign in to comment.