Skip to content

Commit

Permalink
Merge pull request #30 from BojarLab/dev
Browse files Browse the repository at this point in the history
merge into master for v1.0.1
  • Loading branch information
Bribak authored Dec 5, 2023
2 parents 044e18d + 501c88c commit 40b04d7
Show file tree
Hide file tree
Showing 15 changed files with 6,809 additions and 6,195 deletions.
12,324 changes: 6,163 additions & 6,161 deletions 05_examples.ipynb

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions build/lib/glycowork/glycan_data/loader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd
import re
import os
Expand Down Expand Up @@ -188,6 +189,52 @@ def multireplace(string, remove_dic):
return string


def fast_two_sum(a, b):
"""Assume abs(a) >= abs(b)"""
x = int(a) + int(b)
y = b - (x - int(a))
return [x] if y == 0 else [x, y]


def two_sum(a, b):
"""For unknown order of a and b"""
x = int(a) + int(b)
y = (a - (x - int(b))) + (b - (x - int(a)))
return [x] if y == 0 else [x, y]


def expansion_sum(*args):
"""For the expansion sum of floating points"""
g = sorted(args, reverse = True)
q, *h = fast_two_sum(np.array(g[0]), np.array(g[1]))
for val in g[2:]:
z = two_sum(q, np.array(val))
q, *extra = z
if extra:
h += extra
return [h, q] if h else q


def hlm(z):
"""Hodges-Lehmann estimator of the median"""
z = np.array(z)
zz = np.add.outer(z, z)
zz = zz[np.tril_indices(len(z))]
return np.median(zz) / 2


def update_cf_for_m_n(m, n, MM, cf):
"""Constructs cumulative frequency table for experimental parameters defined in the function 'jtkinit'"""
P = min(m + n, MM)
for t in range(n + 1, P + 1): # Zero-based offset t
for u in range(MM, t - 1, -1): # One-based descending index u
cf[u] = expansion_sum(cf[u], -cf[u - t]) # Shewchuk algorithm
Q = min(m, MM)
for s in range(1, Q + 1): # Zero-based offset s
for u in range(s, MM + 1): # One-based descending index u
cf[u] = expansion_sum(cf[u], cf[u - s]) # Shewchuk algorithm


def build_custom_df(df, kind = 'df_species'):
"""creates custom df from df_glycan\n
| Arguments:
Expand Down
35 changes: 34 additions & 1 deletion build/lib/glycowork/motif/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from sklearn.decomposition import PCA

from glycowork.glycan_data.loader import lib, df_species, unwrap, motif_list
from glycowork.motif.processing import cohen_d, mahalanobis_distance, mahalanobis_variance, variance_stabilization, impute_and_normalize, variance_based_filtering
from glycowork.motif.processing import cohen_d, mahalanobis_distance, mahalanobis_variance, variance_stabilization, impute_and_normalize, variance_based_filtering, jtkdist, jtkinit, MissForest, jtkx
from glycowork.motif.annotate import annotate_dataset, quantify_motifs, link_find, create_correlation_network
from glycowork.motif.graph import subgraph_isomorphism

Expand Down Expand Up @@ -849,3 +849,36 @@ def get_time_series(df, impute = True, motifs = False, feature_set = ['known', '
res = pd.DataFrame(res, columns = ['Glycan', 'Change', 'p-val'])
res['corr p-val'] = multipletests(res['p-val'], method = 'fdr_bh')[1]
return res.sort_values(by = 'corr p-val')


def get_jtk(df, timepoints, replicates, periods, interval, motifs = False, feature_set = ['known', 'exhaustive', 'terminal']):
"""Wrapper function running the analysis \n
| Arguments:
| :-
| df (pd.DataFrame): A dataframe containing data for analysis.
| (column 0 = molecule IDs, then arranged in groups and by ascending timepoints)
| timepoints (int): number of timepoints in the experiment.
| replicates (int): number of replicates per timepoints.
| periods (int): number of timepoints per cycle.
| interval (int): units of time (Arbitrary units) between experimental timepoints.
| motifs (bool): a flag for running structural of motif-based analysis (True = run motif analysis); default:False.
| feature_set (list): which feature set to use for annotations, add more to list to expand; default is ['exhaustive','known']; options are: 'known' (hand-crafted glycan features), 'graph' (structural graph features of glycans), 'exhaustive' (all mono- and disaccharide features), 'terminal' (non-reducing end motifs), and 'chemical' (molecular properties of glycan)\n
| Returns:
| :-
| Returns a pandas dataframe containing the adjusted p-values, and most important waveform parameters for each
| molecule in the analysis.
"""
param_dic = {"GRP_SIZE": [], "NUM_GRPS": [], "MAX": [], "DIMS": [], "EXACT": bool(True),
"VAR": [], "EXV": [], "SDV": [], "CGOOSV": []}
param_dic = jtkdist(timepoints, param_dic, replicates)
param_dic = jtkinit(periods, param_dic, interval, replicates)
mf = MissForest()
df.replace(0, np.nan, inplace = True)
df = mf.fit_transform(df)
if motifs:
df = quantify_motifs(df.iloc[:, 1:], df.iloc[:, 0].values.tolist(), feature_set).T
res = df.apply(jtkx, param_dic = param_dic, axis = 1)
JTK_BHQ = pd.DataFrame(sm.stats.multipletests(res[0], method = 'fdr_bh')[1])
Results = pd.concat([res.iloc[:, 0], JTK_BHQ, res.iloc[:, 1:]], axis = 1)
Results.columns = ['Molecule_Name', 'BH_Q_Value', 'Adjusted_P_value', 'Period_Length', 'Lag_Phase', 'Amplitude']
return Results
20 changes: 13 additions & 7 deletions build/lib/glycowork/motif/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
import copy

try:
import cairosvg
import drawsvg as draw
except ImportError:
raise ImportError("<draw dependencies missing; did you do 'pip install glycowork[draw]'?>")
import networkx as nx
import numpy as np
import sys
import re
Expand Down Expand Up @@ -2147,7 +2145,11 @@ def calculate_degree(y1, y2, x1, x2):
with open(filepath, 'w') as f:
f.write(data)
elif 'pdf' in filepath:
cairosvg.svg2pdf(bytestring = data, write_to = filepath)
try:
from cairosvg import svg2pdf
svg2pdf(bytestring = data, write_to = filepath)
except:
raise ImportError("You're missing some draw dependencies. Either use .svg or head to https://bojarlab.github.io/glycowork/examples.html#glycodraw-code-snippets to learn more.")
return d2


Expand Down Expand Up @@ -2244,11 +2246,15 @@ def annotate_figure(svg_input, scale_range = (25, 80), compact = False, glycan_s
svg_tmp += '</svg>'

if filepath:
try:
from cairosvg import svg2pdf, svg2svg, svg2png
if filepath.split('.')[-1] == 'pdf':
cairosvg.svg2pdf(bytestring = svg_tmp, write_to = filepath, dpi = 300)
svg2pdf(bytestring = svg_tmp, write_to = filepath, dpi = 300)
elif filepath.split('.')[-1] == 'svg':
cairosvg.svg2svg(bytestring = svg_tmp, write_to = filepath, dpi = 300)
svg2svg(bytestring = svg_tmp, write_to = filepath, dpi = 300)
elif filepath.split('.')[-1] == 'png':
cairosvg.svg2png(bytestring = svg_tmp, write_to = filepath, dpi = 300)
svg2png(bytestring = svg_tmp, write_to = filepath, dpi = 300)
except:
raise ImportError("You're missing some draw dependencies. Either don't use filepath or head to https://bojarlab.github.io/glycowork/examples.html#glycodraw-code-snippets to learn more.")
else:
return svg_tmp
return svg_tmp
Loading

0 comments on commit 40b04d7

Please sign in to comment.