GUI progress bar + smoothing details

- add progress bar to glycoworkGUI - smarter handling of columns in get_SparCC - lazyloading of biosynthesis files
BojarLab · Apr 12, 2024 · 8b2ec1a · 8b2ec1a
1 parent d0de4f6
commit 8b2ec1a
Show file tree

Hide file tree

Showing 7 changed files with 6,273 additions and 6,234 deletions.
diff --git a/04_network.ipynb b/04_network.ipynb
diff --git a/05_examples.ipynb b/05_examples.ipynb
diff --git a/bin/glycoworkGUI.py b/bin/glycoworkGUI.py
@@ -1,7 +1,8 @@
 import os
 import sys
+import threading
 import tkinter as tk
-from tkinter import simpledialog, filedialog, messagebox
+from tkinter import simpledialog, filedialog, messagebox, ttk
 from glycowork.motif.draw import GlycoDraw, plot_glycans_excel
 from glycowork.motif.analysis import get_differential_expression, get_heatmap
 
@@ -18,6 +19,26 @@ def resource_path(relative_path):
     return os.path.join(base_path, relative_path)
 
 
+class ProgressDialog(tk.Toplevel):
+    def __init__(self, parent):
+        super().__init__(parent)
+        self.title("Processing")
+        self.progress = ttk.Progressbar(self, orient = "horizontal", length = 300, mode = 'indeterminate')
+        self.progress.pack(pady = 20)
+        self.protocol("WM_DELETE_WINDOW", self.on_close)
+        self.begin()
+
+    def begin(self):
+        self.progress.start(10)
+
+    def end(self):
+        self.progress.stop()
+        self.destroy()
+
+    def on_close(self):
+        messagebox.showwarning("Warning", "Process is running. Please wait...")
+
+
 class GlycoDrawDialog(simpledialog.Dialog):
     def body(self, master):
         self.title("GlycoDraw Input")
@@ -170,15 +191,23 @@ def browse_output_folder(self):
             self.output_folder_var.set(folder_path)
 
 
-def openDifferentialExpressionDialog():
-    dialog_result = DifferentialExpressionDialog(app)
-    if dialog_result.result:
-        csv_file_path, treatment_indices, control_indices, motifs, output_folder = dialog_result.result
+def run_differential_expression(csv_file_path, treatment_indices, control_indices, motifs, output_folder, progress_dialog):
+    try:
         df_out = get_differential_expression(df = csv_file_path,
                                group1 = control_indices,
                                group2 = treatment_indices,
                                motifs = motifs)
         plot_glycans_excel(df_out, output_folder)
+    finally:
+        progress_dialog.end()
+
+
+def openDifferentialExpressionDialog():
+    dialog_result = DifferentialExpressionDialog(app)
+    if dialog_result.result:
+        csv_file_path, treatment_indices, control_indices, motifs, output_folder = dialog_result.result
+        progress_dialog = ProgressDialog(app)
+        threading.Thread(target = run_differential_expression, args = (csv_file_path, treatment_indices, control_indices, motifs, output_folder, progress_dialog), daemon = True).start()
 
 
 class GetHeatmapDialog(simpledialog.Dialog):
@@ -198,7 +227,7 @@ def body(self, master):
         self.motif_analysis_check.grid(row = 1, columnspan = 3, sticky = tk.W)
 
         # Output PDF file selection
-        tk.Label(master, text="Select Output for PDF File:").grid(row = 2, sticky = tk.W)
+        tk.Label(master, text = "Select Output for PDF File:").grid(row = 2, sticky = tk.W)
         self.output_file_entry = tk.Entry(master)
         self.output_file_entry.grid(row = 2, column = 1)
         self.output_file_browse = tk.Button(master, text = "Browse...", command = self.browse_output_file)

diff --git a/build/lib/glycowork/motif/analysis.py b/build/lib/glycowork/motif/analysis.py
@@ -57,7 +57,6 @@ def get_pvals_motifs(df, glycan_col_name = 'glycan', label_col_name = 'target',
     if multiple_samples:
         df = df.drop('target', axis = 1, errors = 'ignore').T.reset_index()
         df.columns = [glycan_col_name] + [label_col_name] * (len(df.columns) - 1)
-        #df = df.apply(replace_outliers_winsorization, axis = 1)
     if not zscores:
         means = df.iloc[:, 1:].mean()
         std_devs = df.iloc[:, 1:].std()
@@ -1144,6 +1143,10 @@ def get_SparCC(df1, df2, motifs = False, feature_set = ["known", "exhaustive"],
     df2 = pd.read_csv(df2) if df2.endswith(".csv") else pd.read_excel(df2)
   df1.iloc[:, 0] = strip_suffixes(df1.iloc[:, 0])
   df2.iloc[:, 0] = strip_suffixes(df2.iloc[:, 0])
+  if df1.columns.tolist()[0] != df2.columns.tolist()[0] and df1.columns.tolist()[0] in df2.columns.tolist():
+      common_columns = df1.columns.intersection(df2.columns)
+      df1 = df1[common_columns]
+      df2 = df2[common_columns]
   # Drop rows with all zero, followed by outlier removal and imputation & normalization
   df1 = df1.loc[~(df1.iloc[:, 1:] == 0).all(axis = 1)]
   df1 = df1.apply(replace_outliers_winsorization, axis = 1)

diff --git a/glycowork/glycan_data/stats.py b/glycowork/glycan_data/stats.py
@@ -945,8 +945,7 @@ def get_procrustes_scores(df, group1, group2, paired = False):
     var_group1 = df[group1].var(axis = 1)
     var_group2 = df[group2].var(axis = 1)
     variances = abs(var_group1 - var_group2)
-  procrustes_disparities = [procrustes(ref_matrix.drop(ref_matrix.index[i]), alr_transformation(df, i))[2] for i in range(df.shape[0])]
-  procrustes_corr = [1 - a for a in procrustes_disparities]
+  procrustes_corr = [1 - procrustes(ref_matrix.drop(ref_matrix.index[i]), alr_transformation(df, i))[2] for i in range(df.shape[0])]
   return [a * (1/b) for a, b in zip(procrustes_corr, variances)], procrustes_corr, variances
 
 

diff --git a/glycowork/motif/analysis.py b/glycowork/motif/analysis.py
@@ -57,7 +57,6 @@ def get_pvals_motifs(df, glycan_col_name = 'glycan', label_col_name = 'target',
     if multiple_samples:
         df = df.drop('target', axis = 1, errors = 'ignore').T.reset_index()
         df.columns = [glycan_col_name] + [label_col_name] * (len(df.columns) - 1)
-        #df = df.apply(replace_outliers_winsorization, axis = 1)
     if not zscores:
         means = df.iloc[:, 1:].mean()
         std_devs = df.iloc[:, 1:].std()
@@ -1144,6 +1143,10 @@ def get_SparCC(df1, df2, motifs = False, feature_set = ["known", "exhaustive"],
     df2 = pd.read_csv(df2) if df2.endswith(".csv") else pd.read_excel(df2)
   df1.iloc[:, 0] = strip_suffixes(df1.iloc[:, 0])
   df2.iloc[:, 0] = strip_suffixes(df2.iloc[:, 0])
+  if df1.columns.tolist()[0] != df2.columns.tolist()[0] and df1.columns.tolist()[0] in df2.columns.tolist():
+      common_columns = df1.columns.intersection(df2.columns)
+      df1 = df1[common_columns]
+      df2 = df2[common_columns]
   # Drop rows with all zero, followed by outlier removal and imputation & normalization
   df1 = df1.loc[~(df1.iloc[:, 1:] == 0).all(axis = 1)]
   df1 = df1.apply(replace_outliers_winsorization, axis = 1)

diff --git a/glycowork/network/biosynthesis.py b/glycowork/network/biosynthesis.py
@@ -19,12 +19,15 @@
 from glycowork.motif.tokenization import get_stem_lib
 from glycowork.motif.regex import get_match
 
-with resources.open_text("glycowork.network", "monolink_to_enzyme.csv") as f:
-  df_enzyme = pd.read_csv(f, sep = '\t')
-
 this_dir, this_filename = os.path.split(__file__) 
 data_path = os.path.join(this_dir, 'milk_networks_exhaustive.pkl')
-net_dic = pickle.load(open(data_path, 'rb'))
+
+def __getattr__(name):
+  if name == "net_dic":
+    net_dic = pickle.load(open(data_path, 'rb'))
+    globals()[name] = net_dic  # Cache it to avoid reloading
+    return net_dic
+  raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
 
 permitted_roots = {"Gal(b1-4)Glc-ol", "Gal(b1-4)GlcNAc-ol"}
 allowed_ptms = {'OS', '3S', '6S', 'OP', '1P', '3P', '6P', 'OAc', '4Ac', '9Ac'}
@@ -715,6 +718,8 @@ def construct_network(glycans, allowed_ptms = allowed_ptms,
           if edge_type == 'monosaccharide':
             elem['diffs'] = edge.split('(')[0]
           elif edge_type == 'enzyme':
+            with resources.open_text("glycowork.network", "monolink_to_enzyme.csv") as f:
+              df_enzyme = pd.read_csv(f, sep = '\t')
             elem['diffs'] = monolink_to_glycoenzyme(edge, df_enzyme)
           else:
             pass
@@ -1119,7 +1124,7 @@ def evoprune_network(network, network_dic = None, species_list = None,
   | Returns pruned network (with virtual node probability as a new node attribute)
   """
   if network_dic is None:
-    network_dic = net_dic
+    network_dic = pickle.load(open(data_path, 'rb'))
   if species_list is None:
     species_list = list(network_dic.keys())
   # Calculate path probabilities of diamonds
@@ -1153,7 +1158,7 @@ def highlight_network(network, highlight, motif = None,
   | Returns a network with the additional 'origin' (motif/species) or 'abundance' (abundance/conservation) node attribute storing the highlight
   """
   if network_dic is None:
-    network_dic = net_dic
+    network_dic = pickle.load(open(data_path, 'rb'))
   # Determine highlight validity
   if highlight not in ['motif', 'species', 'abundance', 'conservation']:
     print(f"Invalid highlight argument: {highlight}")