-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: create generalized networkx constructor (#69)
house network/graph related functions in their own modules. This effectively duplicates some stuff that we've done previously for the dash app, but I think we can move/collapse that at a later date (i.e. I'm not making any breaking changes here yet, just adding in redundant functionality for now)
- Loading branch information
1 parent
3e27fd1
commit aa5a450
Showing
10 changed files
with
7,553 additions
and
65 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
"""Provide tools for network-based operations. | ||
For our purposes, drugs and genes are the only nodes on the network, because they're the | ||
only real "entities". Groupings like categories and attributes are defined as properties | ||
of the nodes themselves, but different kinds of processing methods and frameworks might | ||
want to break them out as distinct nodes that relate logically to drugs and genes. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
"""Construct a NetworkX graph from DGIpy query results.""" | ||
|
||
import networkx as nx | ||
|
||
from dgipy.data_utils import make_tabular | ||
|
||
|
||
def _get_gene_nodes(result_table: list[dict]) -> list[tuple[str, dict]]: | ||
if result_table and "gene_concept_id" not in result_table[0]: | ||
return [] | ||
|
||
nodes = [] | ||
for row in result_table: | ||
node_attrs = {"type": "gene"} | ||
node_attrs.update( | ||
{ | ||
k[5:]: v | ||
for k, v in row.items() | ||
if k.startswith("gene_") and not k.startswith("gene_concept_id") | ||
} | ||
) | ||
nodes.append((row["gene_concept_id"], node_attrs)) | ||
return nodes | ||
|
||
|
||
def _get_drug_nodes(result_table: list[dict]) -> list[tuple[str, dict]]: | ||
if result_table and "drug_concept_id" not in result_table[0]: | ||
return [] | ||
|
||
nodes = [] | ||
for row in result_table: | ||
node_attrs = {"type": "drug"} | ||
node_attrs.update( | ||
{ | ||
k[5:]: v | ||
for k, v in row.items() | ||
if k.startswith("drug_") and not k.startswith("drug_concept_id") | ||
} | ||
) | ||
nodes.append((row["drug_concept_id"], node_attrs)) | ||
return nodes | ||
|
||
|
||
def _get_interaction_edges(result_table: list[dict]) -> list[tuple[str, str, dict]]: | ||
if result_table and ( | ||
"drug_concept_id" not in result_table[0] | ||
or "gene_concept_id" not in result_table[0] | ||
): | ||
return [] | ||
|
||
edges = [] | ||
for row in result_table: | ||
edge_attrs = {"type": "drug_gene_interaction"} | ||
edge_attrs.update( | ||
{k[12:]: v for k, v in row.items() if k.startswith("interaction_")} | ||
) | ||
edges.append((row["gene_concept_id"], row["drug_concept_id"], edge_attrs)) | ||
return edges | ||
|
||
|
||
def construct_graph(query_result: dict) -> nx.Graph: | ||
"""Construct a NetworkX graph from a DGIpy query result (i.e., a columnar dict). | ||
>>> import dgipy | ||
... from dgipy.network.construct import construct_graph | ||
>>> genes = dgipy.get_genes(["BRAF", "ABL1"]) | ||
>>> graph = construct_graph(genes) | ||
:param query_result: result object directly from DGIpy output. In general, columns | ||
with names starting with ``"drug_"`` will be added as attributes of drug nodes, | ||
``"gene_"`` (excluding ``"gene_category_"``) as attributes of gene nodes, and | ||
``"interaction"`` as part of interaction edges. | ||
:return: nx.Graph, where any included drug, gene, and gene category instances are | ||
nodes, and edges are drawn between interacting genes and drugs, as well as | ||
genes and their corresponding gene categories. | ||
""" | ||
graph = nx.Graph() | ||
result_table = make_tabular(query_result) | ||
|
||
graph.add_nodes_from(_get_gene_nodes(result_table)) | ||
graph.add_nodes_from(_get_drug_nodes(result_table)) | ||
graph.add_edges_from(_get_interaction_edges(result_table)) | ||
|
||
return graph |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
"""Define methods for exporting to common knowledge graph frameworks.""" | ||
|
||
import networkx as nx | ||
|
||
|
||
def to_pykeen(graph: nx.Graph) -> list[tuple[str, str, str]]: | ||
"""Export to PyKEEN triple set. Typically, you'd save this output to a TSV. | ||
PyKEEN likes very straightforward triples. There's probably work we could do to | ||
better characterize the interaction, and also add attributes as additional triples. | ||
As it stands, this method is VERY basic. | ||
:param graph: graph constructed from DGIpy results. | ||
:return: list of triples (e.g. to save to TSV) | ||
""" | ||
triples = [] | ||
for gene_id, drug_id in graph.edges: | ||
gene = graph.nodes[gene_id]["name"] | ||
drug = graph.nodes[drug_id]["name"] | ||
triples.append((gene, "has_drug_target_interaction_with", drug)) | ||
triples.append((drug, "has_drug_target_interaction_with", gene)) | ||
|
||
return triples |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
{ | ||
"gene_name": ["EREG", "EREG", "EREG", "EREG", "EREG", "EREG"], | ||
"gene_concept_id": [ | ||
"hgnc:3443", | ||
"hgnc:3443", | ||
"hgnc:3443", | ||
"hgnc:3443", | ||
"hgnc:3443", | ||
"hgnc:3443" | ||
], | ||
"gene_long_name": [ | ||
"epiregulin", | ||
"epiregulin", | ||
"epiregulin", | ||
"epiregulin", | ||
"epiregulin", | ||
"epiregulin" | ||
], | ||
"drug_name": [ | ||
"CETUXIMAB", | ||
"HUMAN CHORIONIC GONADOTROPIN", | ||
"E6201", | ||
"FEPIXNEBART", | ||
"FEPIXNEBART", | ||
"PANITUMUMAB" | ||
], | ||
"drug_concept_id": [ | ||
"rxcui:318341", | ||
"rxcui:340705", | ||
"iuphar.ligand:7836", | ||
"ncit:C188574", | ||
"chembl:CHEMBL4594573", | ||
"rxcui:263034" | ||
], | ||
"drug_approved": [true, true, false, false, false, true], | ||
"interaction_score": [ | ||
0.2430910705356227, 0.921187214661307, 1.250182648468916, 4.375639269641208, | ||
4.375639269641208, 0.4375639269641209 | ||
], | ||
"interaction_attributes": [ | ||
{ | ||
"Mechanism of Action": null, | ||
"Direct Interaction": null, | ||
"Endogenous Drug": null | ||
}, | ||
{ | ||
"Mechanism of Action": null, | ||
"Direct Interaction": null, | ||
"Endogenous Drug": null | ||
}, | ||
{ | ||
"Mechanism of Action": ["Inhibition"], | ||
"Direct Interaction": ["true"], | ||
"Endogenous Drug": ["false"] | ||
}, | ||
{ | ||
"Mechanism of Action": ["Proepiregulin inhibitor"], | ||
"Direct Interaction": ["true"], | ||
"Endogenous Drug": null | ||
}, | ||
{ | ||
"Mechanism of Action": null, | ||
"Direct Interaction": null, | ||
"Endogenous Drug": null | ||
}, | ||
{ | ||
"Mechanism of Action": null, | ||
"Direct Interaction": null, | ||
"Endogenous Drug": null | ||
} | ||
], | ||
"interaction_sources": [ | ||
["CIViC"], | ||
["NCI"], | ||
["GuideToPharmacology"], | ||
["ChEMBL"], | ||
["TTD"], | ||
["CIViC"] | ||
], | ||
"interaction_pmids": [[], [16543407], [], [], [], []] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
"""Test `dgipy.network.construct`.""" | ||
|
||
import json | ||
from pathlib import Path | ||
|
||
from dgipy.network import construct | ||
|
||
|
||
def test_construct(fixtures_dir: Path): | ||
results = json.load( | ||
(fixtures_dir / "construct_network_input_interactions.json").open() | ||
) | ||
graph = construct.construct_graph(results) | ||
assert len(graph.nodes) == 7 | ||
assert len(graph.edges) == 6 | ||
assert graph.nodes["hgnc:3443"]["long_name"] == "epiregulin" | ||
assert graph.nodes["ncit:C188574"]["approved"] is False | ||
assert graph.edges[("hgnc:3443", "iuphar.ligand:7836")]["attributes"][ | ||
"Mechanism of Action" | ||
] == ["Inhibition"] |