Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: create generalized networkx constructor #69

Merged
merged 14 commits into from
Sep 16, 2024
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python3 -m pip install ".[tests]"

- name: Run tests
run: python3 -m pytest tests/test_dgidb.py
run: python3 -m pytest tests/test_dgidb.py tests/test_network_constructor.py
lint:
name: lint
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
- name: Prettify code
uses: creyD/prettier_action@v4.3
with:
prettier_options: --write src/dgipy/queries/*.graphql
prettier_options: --write src/dgipy/queries/*.graphql tests/fixtures/*.json
7 changes: 7 additions & 0 deletions src/dgipy/network/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Provide tools for network-based operations.

For our purposes, drugs and genes are the only nodes on the network, because they're the
only real "entities". Groupings like categories and attributes are defined as properties
of the nodes themselves, but different kinds of processing methods and frameworks might
want to break them out as distinct nodes that relate logically to drugs and genes.
"""
84 changes: 84 additions & 0 deletions src/dgipy/network/construct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Construct a NetworkX graph from DGIpy query results."""

import networkx as nx

from dgipy.data_utils import make_tabular


def _get_gene_nodes(result_table: list[dict]) -> list[tuple[str, dict]]:
if result_table and "gene_concept_id" not in result_table[0]:
return []

nodes = []
for row in result_table:
node_attrs = {"type": "gene"}
node_attrs.update(
{
k[5:]: v
for k, v in row.items()
if k.startswith("gene_") and not k.startswith("gene_concept_id")
}
)
nodes.append((row["gene_concept_id"], node_attrs))
return nodes


def _get_drug_nodes(result_table: list[dict]) -> list[tuple[str, dict]]:
if result_table and "drug_concept_id" not in result_table[0]:
return []

nodes = []
for row in result_table:
node_attrs = {"type": "drug"}
node_attrs.update(
{
k[5:]: v
for k, v in row.items()
if k.startswith("drug_") and not k.startswith("drug_concept_id")
}
)
nodes.append((row["drug_concept_id"], node_attrs))
return nodes


def _get_interaction_edges(result_table: list[dict]) -> list[tuple[str, str, dict]]:
if result_table and (
"drug_concept_id" not in result_table[0]
or "gene_concept_id" not in result_table[0]
):
return []

edges = []
for row in result_table:
edge_attrs = {"type": "drug_gene_interaction"}
edge_attrs.update(
{k[12:]: v for k, v in row.items() if k.startswith("interaction_")}
)
edges.append((row["gene_concept_id"], row["drug_concept_id"], edge_attrs))
return edges


def construct_graph(query_result: dict) -> nx.Graph:
"""Construct a NetworkX graph from a DGIpy query result (i.e., a columnar dict).

>>> import dgipy
... from dgipy.network.construct import construct_graph
>>> genes = dgipy.get_genes(["BRAF", "ABL1"])
>>> graph = construct_graph(genes)

:param query_result: result object directly from DGIpy output. In general, columns
with names starting with ``"drug_"`` will be added as attributes of drug nodes,
``"gene_"`` (excluding ``"gene_category_"``) as attributes of gene nodes, and
``"interaction"`` as part of interaction edges.
:return: nx.Graph, where any included drug, gene, and gene category instances are
nodes, and edges are drawn between interacting genes and drugs, as well as
genes and their corresponding gene categories.
"""
graph = nx.Graph()
result_table = make_tabular(query_result)

graph.add_nodes_from(_get_gene_nodes(result_table))
graph.add_nodes_from(_get_drug_nodes(result_table))
graph.add_edges_from(_get_interaction_edges(result_table))

return graph
23 changes: 23 additions & 0 deletions src/dgipy/network/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Define methods for exporting to common knowledge graph frameworks."""

import networkx as nx


def to_pykeen(graph: nx.Graph) -> list[tuple[str, str, str]]:
"""Export to PyKEEN triple set. Typically, you'd save this output to a TSV.

PyKEEN likes very straightforward triples. There's probably work we could do to
better characterize the interaction, and also add attributes as additional triples.
As it stands, this method is VERY basic.

:param graph: graph constructed from DGIpy results.
:return: list of triples (e.g. to save to TSV)
"""
triples = []
for gene_id, drug_id in graph.edges:
gene = graph.nodes[gene_id]["name"]
drug = graph.nodes[drug_id]["name"]
triples.append((gene, "has_drug_target_interaction_with", drug))
triples.append((drug, "has_drug_target_interaction_with", gene))

return triples
81 changes: 81 additions & 0 deletions tests/fixtures/construct_network_input_interactions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"gene_name": ["EREG", "EREG", "EREG", "EREG", "EREG", "EREG"],
"gene_concept_id": [
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443"
],
"gene_long_name": [
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin"
],
"drug_name": [
"CETUXIMAB",
"HUMAN CHORIONIC GONADOTROPIN",
"E6201",
"FEPIXNEBART",
"FEPIXNEBART",
"PANITUMUMAB"
],
"drug_concept_id": [
"rxcui:318341",
"rxcui:340705",
"iuphar.ligand:7836",
"ncit:C188574",
"chembl:CHEMBL4594573",
"rxcui:263034"
],
"drug_approved": [true, true, false, false, false, true],
"interaction_score": [
0.2430910705356227, 0.921187214661307, 1.250182648468916, 4.375639269641208,
4.375639269641208, 0.4375639269641209
],
"interaction_attributes": [
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": ["Inhibition"],
"Direct Interaction": ["true"],
"Endogenous Drug": ["false"]
},
{
"Mechanism of Action": ["Proepiregulin inhibitor"],
"Direct Interaction": ["true"],
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
}
],
"interaction_sources": [
["CIViC"],
["NCI"],
["GuideToPharmacology"],
["ChEMBL"],
["TTD"],
["CIViC"]
],
"interaction_pmids": [[], [16543407], [], [], [], []]
}
26 changes: 5 additions & 21 deletions tests/fixtures/get_categories_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,23 @@
},
{
"name": "DRUG RESISTANCE",
"sourceNames": [
"CIViC",
"COSMIC"
]
"sourceNames": ["CIViC", "COSMIC"]
},
{
"name": "DRUGGABLE GENOME",
"sourceNames": [
"HingoraniCasas",
"HopkinsGroom",
"RussLampel"
]
"sourceNames": ["HingoraniCasas", "HopkinsGroom", "RussLampel"]
},
{
"name": "ENZYME",
"sourceNames": [
"GuideToPharmacology"
]
"sourceNames": ["GuideToPharmacology"]
},
{
"name": "KINASE",
"sourceNames": [
"HopkinsGroom",
"Pharos",
"dGene"
]
"sourceNames": ["HopkinsGroom", "Pharos", "dGene"]
},
{
"name": "SERINE THREONINE KINASE",
"sourceNames": [
"GO",
"dGene"
]
"sourceNames": ["GO", "dGene"]
}
]
}
Expand Down
54 changes: 13 additions & 41 deletions tests/fixtures/get_drug_applications_drugsatfda_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,47 +115,19 @@
"application_number": "NDA212099",
"sponsor_name": "BAYER HEALTHCARE",
"openfda": {
"application_number": [
"NDA212099"
],
"brand_name": [
"NUBEQA"
],
"generic_name": [
"DAROLUTAMIDE"
],
"manufacturer_name": [
"Bayer HealthCare Pharmaceuticals Inc."
],
"product_ndc": [
"50419-395"
],
"product_type": [
"HUMAN PRESCRIPTION DRUG"
],
"route": [
"ORAL"
],
"substance_name": [
"DAROLUTAMIDE"
],
"rxcui": [
"2180330",
"2180336"
],
"spl_id": [
"78565601-df32-45a6-9d08-ac84738c8963"
],
"spl_set_id": [
"1a7cb212-56e4-4b9d-a73d-bfee7fe4735e"
],
"package_ndc": [
"50419-395-01",
"50419-395-72"
],
"unii": [
"X05U0N2RCO"
]
"application_number": ["NDA212099"],
"brand_name": ["NUBEQA"],
"generic_name": ["DAROLUTAMIDE"],
"manufacturer_name": ["Bayer HealthCare Pharmaceuticals Inc."],
"product_ndc": ["50419-395"],
"product_type": ["HUMAN PRESCRIPTION DRUG"],
"route": ["ORAL"],
"substance_name": ["DAROLUTAMIDE"],
"rxcui": ["2180330", "2180336"],
"spl_id": ["78565601-df32-45a6-9d08-ac84738c8963"],
"spl_set_id": ["1a7cb212-56e4-4b9d-a73d-bfee7fe4735e"],
"package_ndc": ["50419-395-01", "50419-395-72"],
"unii": ["X05U0N2RCO"]
},
"products": [
{
Expand Down
7,319 changes: 7,318 additions & 1 deletion tests/fixtures/introspection_response.json

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions tests/test_network_constructor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Test `dgipy.network.construct`."""

import json
from pathlib import Path

from dgipy.network import construct


def test_construct(fixtures_dir: Path):
results = json.load(
(fixtures_dir / "construct_network_input_interactions.json").open()
)
graph = construct.construct_graph(results)
assert len(graph.nodes) == 7
assert len(graph.edges) == 6
assert graph.nodes["hgnc:3443"]["long_name"] == "epiregulin"
assert graph.nodes["ncit:C188574"]["approved"] is False
assert graph.edges[("hgnc:3443", "iuphar.ligand:7836")]["attributes"][
"Mechanism of Action"
] == ["Inhibition"]