Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Processor improvements #1450

Merged
merged 6 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions indra/assemblers/indranet/net.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,8 @@ def _simple_scorer_update(G, edge):
# Catch underflow
except FloatingPointError as err:
# Numpy precision
NP_PRECISION = 10 ** -np.finfo(np.longfloat).precision
logger.warning('%s: Resetting ag_belief to 10*np.longfloat precision '
NP_PRECISION = 10 ** -np.finfo(np.longdouble).precision
logger.warning('%s: Resetting ag_belief to 10*np.longdouble precision '
'(%.0e)' % (err, Decimal(NP_PRECISION * 10)))
ag_belief = NP_PRECISION * 10
return ag_belief
Expand All @@ -337,14 +337,14 @@ def _simple_scorer_update(G, edge):
def _complementary_belief(G, edge):
# Aggregate belief score: 1-prod(1-belief_i)
np.seterr(all='raise')
NP_PRECISION = 10 ** -np.finfo(np.longfloat).precision # Numpy precision
NP_PRECISION = 10 ** -np.finfo(np.longdouble).precision # Numpy precision
belief_list = [s['belief'] for s in G.edges[edge]['statements']]
try:
ag_belief = np.longfloat(1.0) - np.prod(np.fromiter(
map(lambda belief: np.longfloat(1.0) - belief, belief_list),
dtype=np.longfloat))
ag_belief = np.longdouble(1.0) - np.prod(np.fromiter(
map(lambda belief: np.longdouble(1.0) - belief, belief_list),
dtype=np.longdouble))
except FloatingPointError as err:
logger.warning('%s: Resetting ag_belief to 10*np.longfloat precision '
logger.warning('%s: Resetting ag_belief to 10*np.longdouble precision '
'(%.0e)' % (err, Decimal(NP_PRECISION * 10)))
ag_belief = NP_PRECISION * 10
return ag_belief
13 changes: 12 additions & 1 deletion indra/databases/hgnc_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,10 @@ def get_hgnc_name_from_mgi_name(mgi_name: str) -> Union[str, None]:
def _read_hgnc_maps():
hgnc_file = get_resource_path("hgnc_entries.tsv")
csv_rows = read_unicode_csv(hgnc_file, delimiter='\t', encoding='utf-8')
hgnc_uniprot_preferred = get_resource_path("hgnc_uniprot_preferred.csv")
csv_rows_uniprot_preferred = \
read_unicode_csv(hgnc_uniprot_preferred, delimiter=',',
encoding='utf-8')
hgnc_names = {}
hgnc_ids = {}
hgnc_withdrawn = []
Expand Down Expand Up @@ -515,19 +519,26 @@ def _read_hgnc_maps():
for old_id, new_id in hgnc_withdrawn_new_ids.items():
hgnc_names[old_id] = hgnc_names[new_id]

uniprot_ids_preferred = {}
for row in csv_rows_uniprot_preferred:
hgnc_id = row[0]
uniprot_id = row[1]
uniprot_ids_preferred[hgnc_id] = uniprot_id

return (
hgnc_names, hgnc_ids, hgnc_withdrawn,
uniprot_ids, entrez_ids, entrez_ids_reverse, mouse_map, rat_map,
prev_sym_map, ensembl_ids, ensembl_ids_reverse, gene_types,
dict(hgnc_to_enzymes), dict(enzyme_to_hgncs),
uniprot_ids_preferred
)


(
hgnc_names, hgnc_ids, hgnc_withdrawn, uniprot_ids, entrez_ids,
entrez_ids_reverse, mouse_map, rat_map, prev_sym_map, ensembl_ids,
ensembl_ids_reverse, gene_type,
hgnc_to_enzymes, enzyme_to_hgncs,
hgnc_to_enzymes, enzyme_to_hgncs, uniprot_ids_preferred
) = _read_hgnc_maps()


Expand Down
12 changes: 8 additions & 4 deletions indra/ontology/bio/ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class BioOntology(IndraOntology):
# should be incremented to "force" rebuilding the ontology to be consistent
# with the underlying resource files.
name = 'bio'
version = '1.33'
version = '1.34'
ontology_namespaces = [
'go', 'efo', 'hp', 'doid', 'chebi', 'ido', 'mondo', 'eccode',
]
Expand Down Expand Up @@ -147,11 +147,15 @@ def add_hgnc_uniprot_entrez_xrefs(self):
from indra.databases import hgnc_client
from indra.databases import uniprot_client
edges = []
for hid, uid in hgnc_client.uniprot_ids.items():
uids = uid.split(', ')
for hid, upid in hgnc_client.uniprot_ids.items():
uids = upid.split(', ')
preferred = hgnc_client.uniprot_ids_preferred.get(hid)
if preferred:
uids = [preferred]
for uid in uids:
edge_data = {'type': 'xref', 'source': 'hgnc'}
edges.append((self.label('HGNC', hid), self.label('UP', uid),
{'type': 'xref', 'source': 'hgnc'}))
edge_data))
self.add_edges_from(edges)

edges = [(self.label('UP', uid), self.label('HGNC', hid),
Expand Down
3 changes: 3 additions & 0 deletions indra/resources/hgnc_uniprot_preferred.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
hgnc_id,uniprot_id
17868,Q9BXH1
30377,Q14160
21 changes: 5 additions & 16 deletions indra/sources/bel/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,28 +546,17 @@ def get_db_refs_by_name(ns, name, node_data):
if up_id:
db_refs = {'UP': up_id}
# Map Selventa families and complexes to FamPlex
elif ns == 'SFAM':
elif ns in {'SFAM', 'SCOMP'}:
try:
sfam_id, xrefs = selventa_lookup[('SFAM', name)]
db_refs = {"SFAM": sfam_id}
selventa_id, xrefs = selventa_lookup[(ns, name)]
db_refs = {ns: selventa_id}
indra_name = bel_to_indra.get(name)
except KeyError:
indra_name = None
db_refs = None

if indra_name is None:
logger.info('Could not find mapping for BEL/SFAM family: '
'%s (%s)' % (name, node_data))
else:
db_refs['FPLX'] = indra_name
name = indra_name
elif ns == 'SCOMP':
scomp_id, xrefs = selventa_lookup[('SCOMP', name)]
db_refs = {'SCOMP': scomp_id}
indra_name = bel_to_indra.get(name)
if indra_name is None:
logger.info('Could not find mapping for BEL/SCOMP complex: '
'%s (%s)' % (name, node_data))
logger.info('Could not find mapping for BEL/%s family: '
'%s (%s)' % (ns, name, node_data))
else:
db_refs['FPLX'] = indra_name
name = indra_name
Expand Down
8 changes: 4 additions & 4 deletions indra/tests/test_indranet_assembler.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ def test_to_digraph():
'Activation', 'Phosphorylation', 'Inhibition', 'IncreaseAmount'}
assert all(digraph.edges[e].get('belief', False) for e in digraph.edges)
assert all(isinstance(digraph.edges[e]['belief'],
(float, np.longfloat)) for e in digraph.edges)
(float, np.longdouble)) for e in digraph.edges)
assert all(digraph.edges[e].get('weight', False) for e in digraph.edges)
assert all(isinstance(digraph.edges[e]['weight'],
(float, np.longfloat)) for e in digraph.edges)
(float, np.longdouble)) for e in digraph.edges)
digraph_from_df = IndraNet.digraph_from_df(df)
assert nx.is_isomorphic(digraph, digraph_from_df)

Expand Down Expand Up @@ -206,11 +206,11 @@ def test_to_signed_graph():
assert all(signed_graph.edges[e].get('belief', False) for e in
signed_graph.edges)
assert all(isinstance(signed_graph.edges[e]['belief'],
(float, np.longfloat)) for e in signed_graph.edges)
(float, np.longdouble)) for e in signed_graph.edges)
assert all(signed_graph.edges[e].get('weight', False) for e in
signed_graph.edges)
assert all(isinstance(signed_graph.edges[e]['weight'],
(float, np.longfloat)) for e in signed_graph.edges)
(float, np.longdouble)) for e in signed_graph.edges)


def _weight_mapping(G):
Expand Down
2 changes: 1 addition & 1 deletion indra/tests/test_pathfinding.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _setup_unsigned_graph():
# Add belief
for e in dg.edges:
dg.edges[e]['belief'] = edge_beliefs[e]
dg.edges[e]['weight'] = -np.log(edge_beliefs[e], dtype=np.longfloat)
dg.edges[e]['weight'] = -np.log(edge_beliefs[e], dtype=np.longdouble)

# Add edge_by_hash
dg.graph['hashes'] = hashes
Expand Down
Loading