Skip to content

Commit

Permalink
add a negation decorator to refine motif definitions
Browse files Browse the repository at this point in the history
  • Loading branch information
Bribak committed Jun 14, 2024
1 parent 965fe6f commit f60c6d4
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 10 deletions.
6 changes: 3 additions & 3 deletions build/lib/glycowork/glycan_data/glycan_motifs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,12 @@ Gala_series,Gal(a1-4)Gal1Cer,"['flexible', 'terminal']"
LPS_core,Kdo(a2-4)Kdo(a2-6)GlcN(b1-6)GlcN,"['flexible', 'flexible', 'flexible', 'terminal']"
Nglycan_complex,GlcNAc(b1-2)Man(a1-3)[GlcNAc(b1-2)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc,"['flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'terminal']"
Nglycan_complex2,GlcNAc(b1-4)Man(a1-3)[GlcNAc(b1-4)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc,"['flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'terminal']"
Oglycan_core1,Gal(b1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core1,Gal(b1-3)[!GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core2,Gal(b1-3)[GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'flexible']"
Oglycan_core3,GlcNAc(b1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core3,GlcNAc(b1-3)[!GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core4,GlcNAc(b1-3)[GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'flexible']"
Oglycan_core5,GalNAc(a1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core6,GlcNAc(b1-6)GalNAc,"['flexible', 'terminal']"
Oglycan_core6,GlcNAc(b1-6)[!GlcNAc(b1-3)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core7,GalNAc(a1-6)GalNAc,"['flexible', 'terminal']"
Xylogalacturonan,Xyl(a1-3)GalA(a1-4)[Xyl(a1-3)]GalA,"['flexible', 'flexible', 'flexible', 'flexible']"
Sialosylparagloboside,Sia(a2-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc1Cer,"['terminal', 'flexible', 'flexible', 'flexible', 'terminal']"
Expand Down
54 changes: 54 additions & 0 deletions build/lib/glycowork/motif/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,18 @@ def expand_termini_list(motif, termini_list):
return t_list


def handle_negation(original_func):
def wrapper(glycan, motif, *args, **kwargs):
if isinstance(motif, str) and '!' in motif:
return subgraph_isomorphism_with_negation(glycan, motif, *args, **kwargs)
elif hasattr(motif, 'nodes') and any('!' in data.get('string_labels', '') for _, data in motif.nodes(data = True)):
return subgraph_isomorphism_with_negation(glycan, motif, *args, **kwargs)
else:
return original_func(glycan, motif, *args, **kwargs)
return wrapper


@handle_negation
def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return_matches = False):
"""returns True if motif is in glycan and False if not\n
| Arguments:
Expand All @@ -264,6 +276,8 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return
if isinstance(glycan, str) and isinstance(motif, str):
if motif.count('(') > glycan.count('('):
return (0, []) if return_matches else 0 if count else False
if not count and not return_matches and motif in glycan:
return True
motif_comp = min_process_glycans([motif, glycan])
if 'O' in glycan + motif:
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', g).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for g in [glycan, motif]]
Expand Down Expand Up @@ -322,6 +336,46 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return
return False if not return_matches else (0, [])


def subgraph_isomorphism_with_negation(glycan, motif, termini_list = [], count = False, return_matches = False):
"""returns True if motif is in glycan and False if not\n
| Arguments:
| :-
| glycan (string or networkx): glycan in IUPAC-condensed format or as graph in NetworkX format
| motif (string or networkx): glycan motif in IUPAC-condensed format or as graph in NetworkX format
| termini_list (list): list of monosaccharide positions (from 'terminal', 'internal', and 'flexible')
| count (bool): whether to return the number or absence/presence of motifs; default:False
| return_matches (bool): whether the matched subgraphs in input glycan should be returned as node lists as an additional output; default:False\n
| Returns:
| :-
| Returns True if motif is in glycan and False if not
"""
if isinstance(motif, str):
temp = motif[motif.index('!'):]
motif_stub = (motif[:motif.index('!')] + temp[temp.index(')')+1:]).replace('[]', '')
else:
motif_stub = motif.copy()
nodes_to_remove = {node for node, data in motif_stub.nodes(data = True)
if '!' in data.get('string_labels', '')}
nodes_to_remove.update({node + 1 for node in nodes_to_remove if node + 1 in motif_stub})
motif_stub.remove_nodes_from(nodes_to_remove)
res = subgraph_isomorphism(glycan, motif_stub, termini_list = termini_list, count = count, return_matches = return_matches)
if not res or (isinstance(res, tuple) and not res[0]):
return res
else:
if isinstance(motif, str):
motif_too_large = motif.replace('!', '')
else:
motif_too_large = motif.copy()
for node, data in motif_too_large.nodes(data = True):
if '!' in data.get('string_labels', ''):
motif_too_large.nodes[node]['string_labels'] = data['string_labels'].replace('!', '')
res2 = subgraph_isomorphism(glycan, motif_too_large, termini_list = termini_list, count = count, return_matches = return_matches)
if res2:
return (0, []) if return_matches else 0 if count else False
else:
return res


def generate_graph_features(glycan, glycan_graph = True, label = 'network'):
"""compute graph features of glycan\n
| Arguments:
Expand Down
6 changes: 3 additions & 3 deletions glycowork/glycan_data/glycan_motifs.csv
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,12 @@ Gala_series,Gal(a1-4)Gal1Cer,"['flexible', 'terminal']"
LPS_core,Kdo(a2-4)Kdo(a2-6)GlcN(b1-6)GlcN,"['flexible', 'flexible', 'flexible', 'terminal']"
Nglycan_complex,GlcNAc(b1-2)Man(a1-3)[GlcNAc(b1-2)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc,"['flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'terminal']"
Nglycan_complex2,GlcNAc(b1-4)Man(a1-3)[GlcNAc(b1-4)Man(a1-6)]Man(b1-4)GlcNAc(b1-4)GlcNAc,"['flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'flexible', 'terminal']"
Oglycan_core1,Gal(b1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core1,Gal(b1-3)[!GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core2,Gal(b1-3)[GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'flexible']"
Oglycan_core3,GlcNAc(b1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core3,GlcNAc(b1-3)[!GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core4,GlcNAc(b1-3)[GlcNAc(b1-6)]GalNAc,"['flexible', 'flexible', 'flexible']"
Oglycan_core5,GalNAc(a1-3)GalNAc,"['flexible', 'terminal']"
Oglycan_core6,GlcNAc(b1-6)GalNAc,"['flexible', 'terminal']"
Oglycan_core6,GlcNAc(b1-6)[!GlcNAc(b1-3)]GalNAc,"['flexible', 'flexible', 'terminal']"
Oglycan_core7,GalNAc(a1-6)GalNAc,"['flexible', 'terminal']"
Xylogalacturonan,Xyl(a1-3)GalA(a1-4)[Xyl(a1-3)]GalA,"['flexible', 'flexible', 'flexible', 'flexible']"
Sialosylparagloboside,Sia(a2-3)Gal(b1-4)GlcNAc(b1-3)Gal(b1-4)Glc1Cer,"['terminal', 'flexible', 'flexible', 'flexible', 'terminal']"
Expand Down
54 changes: 54 additions & 0 deletions glycowork/motif/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,18 @@ def expand_termini_list(motif, termini_list):
return t_list


def handle_negation(original_func):
def wrapper(glycan, motif, *args, **kwargs):
if isinstance(motif, str) and '!' in motif:
return subgraph_isomorphism_with_negation(glycan, motif, *args, **kwargs)
elif hasattr(motif, 'nodes') and any('!' in data.get('string_labels', '') for _, data in motif.nodes(data = True)):
return subgraph_isomorphism_with_negation(glycan, motif, *args, **kwargs)
else:
return original_func(glycan, motif, *args, **kwargs)
return wrapper


@handle_negation
def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return_matches = False):
"""returns True if motif is in glycan and False if not\n
| Arguments:
Expand All @@ -264,6 +276,8 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return
if isinstance(glycan, str) and isinstance(motif, str):
if motif.count('(') > glycan.count('('):
return (0, []) if return_matches else 0 if count else False
if not count and not return_matches and motif in glycan:
return True
motif_comp = min_process_glycans([motif, glycan])
if 'O' in glycan + motif:
glycan, motif = [re.sub(r"(?<=[a-zA-Z])\d+(?=[a-zA-Z])", 'O', g).replace('NeuOAc', 'Neu5Ac').replace('NeuOGc', 'Neu5Gc') for g in [glycan, motif]]
Expand Down Expand Up @@ -322,6 +336,46 @@ def subgraph_isomorphism(glycan, motif, termini_list = [], count = False, return
return False if not return_matches else (0, [])


def subgraph_isomorphism_with_negation(glycan, motif, termini_list = [], count = False, return_matches = False):
"""returns True if motif is in glycan and False if not\n
| Arguments:
| :-
| glycan (string or networkx): glycan in IUPAC-condensed format or as graph in NetworkX format
| motif (string or networkx): glycan motif in IUPAC-condensed format or as graph in NetworkX format
| termini_list (list): list of monosaccharide positions (from 'terminal', 'internal', and 'flexible')
| count (bool): whether to return the number or absence/presence of motifs; default:False
| return_matches (bool): whether the matched subgraphs in input glycan should be returned as node lists as an additional output; default:False\n
| Returns:
| :-
| Returns True if motif is in glycan and False if not
"""
if isinstance(motif, str):
temp = motif[motif.index('!'):]
motif_stub = (motif[:motif.index('!')] + temp[temp.index(')')+1:]).replace('[]', '')
else:
motif_stub = motif.copy()
nodes_to_remove = {node for node, data in motif_stub.nodes(data = True)
if '!' in data.get('string_labels', '')}
nodes_to_remove.update({node + 1 for node in nodes_to_remove if node + 1 in motif_stub})
motif_stub.remove_nodes_from(nodes_to_remove)
res = subgraph_isomorphism(glycan, motif_stub, termini_list = termini_list, count = count, return_matches = return_matches)
if not res or (isinstance(res, tuple) and not res[0]):
return res
else:
if isinstance(motif, str):
motif_too_large = motif.replace('!', '')
else:
motif_too_large = motif.copy()
for node, data in motif_too_large.nodes(data = True):
if '!' in data.get('string_labels', ''):
motif_too_large.nodes[node]['string_labels'] = data['string_labels'].replace('!', '')
res2 = subgraph_isomorphism(glycan, motif_too_large, termini_list = termini_list, count = count, return_matches = return_matches)
if res2:
return (0, []) if return_matches else 0 if count else False
else:
return res


def generate_graph_features(glycan, glycan_graph = True, label = 'network'):
"""compute graph features of glycan\n
| Arguments:
Expand Down
5 changes: 1 addition & 4 deletions glycowork/motif/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,10 +339,7 @@ def process_pattern(p, p2, ggraph, glycan, match_location):
| :-
| Returns list of matches as list of node indices
"""
if isinstance(p2, dict):
return process_complex_pattern(p, p2, ggraph, glycan, match_location)
else:
return process_simple_pattern(p2, ggraph, match_location)
return process_complex_pattern(p, p2, ggraph, glycan, match_location) if isinstance(p2, dict) else process_simple_pattern(p2, ggraph, match_location)


def match_it_up(pattern_components, glycan, ggraph):
Expand Down

0 comments on commit f60c6d4

Please sign in to comment.