Skip to content

Commit

Permalink
minor speed-ups and version release prep
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Aug 3, 2023
1 parent 02666b2 commit 870af95
Show file tree
Hide file tree
Showing 22 changed files with 38,046 additions and 35,803 deletions.
5,034 changes: 2,741 additions & 2,293 deletions 00_core.ipynb

Large diffs are not rendered by default.

11,605 changes: 5,798 additions & 5,807 deletions 01_glycan_data.ipynb

Large diffs are not rendered by default.

25 changes: 7 additions & 18 deletions 02_ml.ipynb

Large diffs are not rendered by default.

7,087 changes: 3,763 additions & 3,324 deletions 03_motif.ipynb

Large diffs are not rendered by default.

87 changes: 48 additions & 39 deletions 04_network.ipynb

Large diffs are not rendered by default.

12,083 changes: 6,019 additions & 6,064 deletions 05_examples.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ alternative: <br>
Note that we have optional extra installs for specialized use (even
further instructions can be found in the `Examples` tab), such as: <br>
*deep learning* <br> `pip install glycowork[ml]` <br> *drawing glycan
images with GlycoDraw* <br> `pip install glycowork[draw]` <br>
*analyzing atomic/chemical properties of glycans* <br>
`pip install glycowork[chem]` <br> *everything* <br>
`pip install glycowork[all]` <br>
images with GlycoDraw (see install instructions in the `Examples` tab)*
<br> `pip install glycowork[draw]` <br> *analyzing atomic/chemical
properties of glycans* <br> `pip install glycowork[chem]` <br>
*everything* <br> `pip install glycowork[all]` <br>

## Data & Models

Expand Down
2 changes: 1 addition & 1 deletion _proc/.quarto/xref/2c7d6167
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"entries":[],"headings":["install","data-models","how-to-use"]}
{"headings":["install","data-models","how-to-use"],"entries":[]}
5,034 changes: 2,741 additions & 2,293 deletions _proc/00_core.ipynb

Large diffs are not rendered by default.

11,594 changes: 5,797 additions & 5,797 deletions _proc/01_glycan_data.ipynb

Large diffs are not rendered by default.

14 changes: 6 additions & 8 deletions _proc/02_ml.ipynb

Large diffs are not rendered by default.

8,107 changes: 4,558 additions & 3,549 deletions _proc/03_motif.ipynb

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions _proc/04_network.ipynb

Large diffs are not rendered by default.

12,083 changes: 6,019 additions & 6,064 deletions _proc/05_examples.ipynb

Large diffs are not rendered by default.

246 changes: 123 additions & 123 deletions _proc/_docs/index_files/figure-commonmark/cell-3-output-1.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
248 changes: 124 additions & 124 deletions _proc/index.ipynb

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions build/lib/glycowork/motif/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,11 @@ def presence_to_matrix(df, glycan_col_name = 'target', label_col_name = 'Species
| :-
| Returns pandas dataframe with labels as rows and glycan occurrences as columns
"""
glycans = sorted(set(df[glycan_col_name].values.tolist()))
species = sorted(set(df[label_col_name].values.tolist()))
# Get a count matrix for each rank - glycan combination
mat_dic = {k: [df[df[label_col_name] == j][glycan_col_name].values.tolist().count(k) for j in species] for k in glycans}
mat = pd.DataFrame(mat_dic)
mat.index = species
return mat
# Create a grouped dataframe where we count the occurrences of each glycan in each species group
grouped_df = df.groupby([label_col_name, glycan_col_name]).size().unstack(fill_value = 0)
# Sort the index and columns
grouped_df = grouped_df.sort_index().sort_index(axis = 1)
return grouped_df


def find_matching_brackets_indices(s):
Expand All @@ -153,12 +151,12 @@ def find_matching_brackets_indices(s):
stack.append(i)
opening_indices[i] = len(stack) - 1
elif c == ']':
if len(stack) > 0:
if stack:
opening_index = stack.pop()
matching_indices.append((opening_index, i))
del opening_indices[opening_index]

if len(stack) > 0:
if stack:
print("Unmatched opening brackets:", [s[i] for i in stack])
return None
else:
Expand Down
11 changes: 7 additions & 4 deletions build/lib/glycowork/motif/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,15 +327,15 @@ def mz_to_composition(mz_value, mode = 'negative', mass_value = 'monoisotopic',
if not filter_out.intersection(c.keys()):
out = [c]
break
if len(out) > 0:
if out:
return out
else:
for m, c in cache.items():
if abs(m+adduct - mz_value) < mass_tolerance:
if not filter_out.intersection(c.keys()):
out = [c]
break
if len(out) > 0:
if out:
return out
else:
mz_value = (mz_value+0.5*multiplier)*2+(reduced*1)
Expand Down Expand Up @@ -669,11 +669,14 @@ def composition_to_mass(dict_comp, mass_value = 'monoisotopic',
| :-
| Returns the theoretical mass of input composition
"""
mass_dict = dict(zip(mapping_file.composition, mapping_file[sample_prep + '_' + mass_value]))
if sample_prep + '_' + mass_value == "underivatized_monoisotopic":
mass_dict_in = mass_dict
else:
mass_dict_in = dict(zip(mapping_file.composition, mapping_file[sample_prep + '_' + mass_value]))
for old_key, new_key in {'S': 'Sulphate', 'P': 'Phosphate', 'Me': 'Methyl', 'Ac': 'Acetate'}.items():
if old_key in dict_comp:
dict_comp[new_key] = dict_comp.pop(old_key)
return sum(mass_dict.get(k, 0) * v for k, v in dict_comp.items()) + mass_dict['red_end']
return sum(mass_dict_in.get(k, 0) * v for k, v in dict_comp.items()) + mass_dict_in['red_end']


def glycan_to_mass(glycan, mass_value = 'monoisotopic', sample_prep = 'underivatized', stem_libr = None):
Expand Down
16 changes: 7 additions & 9 deletions glycowork/motif/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,11 @@ def presence_to_matrix(df, glycan_col_name = 'target', label_col_name = 'Species
| :-
| Returns pandas dataframe with labels as rows and glycan occurrences as columns
"""
glycans = sorted(set(df[glycan_col_name].values.tolist()))
species = sorted(set(df[label_col_name].values.tolist()))
# Get a count matrix for each rank - glycan combination
mat_dic = {k: [df[df[label_col_name] == j][glycan_col_name].values.tolist().count(k) for j in species] for k in glycans}
mat = pd.DataFrame(mat_dic)
mat.index = species
return mat
# Create a grouped dataframe where we count the occurrences of each glycan in each species group
grouped_df = df.groupby([label_col_name, glycan_col_name]).size().unstack(fill_value = 0)
# Sort the index and columns
grouped_df = grouped_df.sort_index().sort_index(axis = 1)
return grouped_df


def find_matching_brackets_indices(s):
Expand All @@ -153,12 +151,12 @@ def find_matching_brackets_indices(s):
stack.append(i)
opening_indices[i] = len(stack) - 1
elif c == ']':
if len(stack) > 0:
if stack:
opening_index = stack.pop()
matching_indices.append((opening_index, i))
del opening_indices[opening_index]

if len(stack) > 0:
if stack:
print("Unmatched opening brackets:", [s[i] for i in stack])
return None
else:
Expand Down
11 changes: 7 additions & 4 deletions glycowork/motif/tokenization.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,15 +327,15 @@ def mz_to_composition(mz_value, mode = 'negative', mass_value = 'monoisotopic',
if not filter_out.intersection(c.keys()):
out = [c]
break
if len(out) > 0:
if out:
return out
else:
for m, c in cache.items():
if abs(m+adduct - mz_value) < mass_tolerance:
if not filter_out.intersection(c.keys()):
out = [c]
break
if len(out) > 0:
if out:
return out
else:
mz_value = (mz_value+0.5*multiplier)*2+(reduced*1)
Expand Down Expand Up @@ -669,11 +669,14 @@ def composition_to_mass(dict_comp, mass_value = 'monoisotopic',
| :-
| Returns the theoretical mass of input composition
"""
mass_dict = dict(zip(mapping_file.composition, mapping_file[sample_prep + '_' + mass_value]))
if sample_prep + '_' + mass_value == "underivatized_monoisotopic":
mass_dict_in = mass_dict
else:
mass_dict_in = dict(zip(mapping_file.composition, mapping_file[sample_prep + '_' + mass_value]))
for old_key, new_key in {'S': 'Sulphate', 'P': 'Phosphate', 'Me': 'Methyl', 'Ac': 'Acetate'}.items():
if old_key in dict_comp:
dict_comp[new_key] = dict_comp.pop(old_key)
return sum(mass_dict.get(k, 0) * v for k, v in dict_comp.items()) + mass_dict['red_end']
return sum(mass_dict_in.get(k, 0) * v for k, v in dict_comp.items()) + mass_dict_in['red_end']


def glycan_to_mass(glycan, mass_value = 'monoisotopic', sample_prep = 'underivatized', stem_libr = None):
Expand Down
Loading

0 comments on commit 870af95

Please sign in to comment.