Skip to content

Commit

Permalink
feat: create generalized networkx constructor (#69)
Browse files Browse the repository at this point in the history
house network/graph related functions in their own modules. This effectively duplicates some stuff that we've done previously for the dash app, but I think we can move/collapse that at a later date (i.e. I'm not making any breaking changes here yet, just adding in redundant functionality for now)
  • Loading branch information
jsstevenson authored Sep 16, 2024
1 parent 3e27fd1 commit aa5a450
Show file tree
Hide file tree
Showing 10 changed files with 7,553 additions and 65 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python3 -m pip install ".[tests]"
- name: Run tests
run: python3 -m pytest tests/test_dgidb.py
run: python3 -m pytest tests/test_dgidb.py tests/test_network_constructor.py
lint:
name: lint
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
- name: Prettify code
uses: creyD/prettier_action@v4.3
with:
prettier_options: --write src/dgipy/queries/*.graphql
prettier_options: --write src/dgipy/queries/*.graphql tests/fixtures/*.json
7 changes: 7 additions & 0 deletions src/dgipy/network/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Provide tools for network-based operations.
For our purposes, drugs and genes are the only nodes on the network, because they're the
only real "entities". Groupings like categories and attributes are defined as properties
of the nodes themselves, but different kinds of processing methods and frameworks might
want to break them out as distinct nodes that relate logically to drugs and genes.
"""
84 changes: 84 additions & 0 deletions src/dgipy/network/construct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""Construct a NetworkX graph from DGIpy query results."""

import networkx as nx

from dgipy.data_utils import make_tabular


def _get_gene_nodes(result_table: list[dict]) -> list[tuple[str, dict]]:
if result_table and "gene_concept_id" not in result_table[0]:
return []

nodes = []
for row in result_table:
node_attrs = {"type": "gene"}
node_attrs.update(
{
k[5:]: v
for k, v in row.items()
if k.startswith("gene_") and not k.startswith("gene_concept_id")
}
)
nodes.append((row["gene_concept_id"], node_attrs))
return nodes


def _get_drug_nodes(result_table: list[dict]) -> list[tuple[str, dict]]:
if result_table and "drug_concept_id" not in result_table[0]:
return []

nodes = []
for row in result_table:
node_attrs = {"type": "drug"}
node_attrs.update(
{
k[5:]: v
for k, v in row.items()
if k.startswith("drug_") and not k.startswith("drug_concept_id")
}
)
nodes.append((row["drug_concept_id"], node_attrs))
return nodes


def _get_interaction_edges(result_table: list[dict]) -> list[tuple[str, str, dict]]:
if result_table and (
"drug_concept_id" not in result_table[0]
or "gene_concept_id" not in result_table[0]
):
return []

edges = []
for row in result_table:
edge_attrs = {"type": "drug_gene_interaction"}
edge_attrs.update(
{k[12:]: v for k, v in row.items() if k.startswith("interaction_")}
)
edges.append((row["gene_concept_id"], row["drug_concept_id"], edge_attrs))
return edges


def construct_graph(query_result: dict) -> nx.Graph:
"""Construct a NetworkX graph from a DGIpy query result (i.e., a columnar dict).
>>> import dgipy
... from dgipy.network.construct import construct_graph
>>> genes = dgipy.get_genes(["BRAF", "ABL1"])
>>> graph = construct_graph(genes)
:param query_result: result object directly from DGIpy output. In general, columns
with names starting with ``"drug_"`` will be added as attributes of drug nodes,
``"gene_"`` (excluding ``"gene_category_"``) as attributes of gene nodes, and
``"interaction"`` as part of interaction edges.
:return: nx.Graph, where any included drug, gene, and gene category instances are
nodes, and edges are drawn between interacting genes and drugs, as well as
genes and their corresponding gene categories.
"""
graph = nx.Graph()
result_table = make_tabular(query_result)

graph.add_nodes_from(_get_gene_nodes(result_table))
graph.add_nodes_from(_get_drug_nodes(result_table))
graph.add_edges_from(_get_interaction_edges(result_table))

return graph
23 changes: 23 additions & 0 deletions src/dgipy/network/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Define methods for exporting to common knowledge graph frameworks."""

import networkx as nx


def to_pykeen(graph: nx.Graph) -> list[tuple[str, str, str]]:
"""Export to PyKEEN triple set. Typically, you'd save this output to a TSV.
PyKEEN likes very straightforward triples. There's probably work we could do to
better characterize the interaction, and also add attributes as additional triples.
As it stands, this method is VERY basic.
:param graph: graph constructed from DGIpy results.
:return: list of triples (e.g. to save to TSV)
"""
triples = []
for gene_id, drug_id in graph.edges:
gene = graph.nodes[gene_id]["name"]
drug = graph.nodes[drug_id]["name"]
triples.append((gene, "has_drug_target_interaction_with", drug))
triples.append((drug, "has_drug_target_interaction_with", gene))

return triples
81 changes: 81 additions & 0 deletions tests/fixtures/construct_network_input_interactions.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"gene_name": ["EREG", "EREG", "EREG", "EREG", "EREG", "EREG"],
"gene_concept_id": [
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443",
"hgnc:3443"
],
"gene_long_name": [
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin",
"epiregulin"
],
"drug_name": [
"CETUXIMAB",
"HUMAN CHORIONIC GONADOTROPIN",
"E6201",
"FEPIXNEBART",
"FEPIXNEBART",
"PANITUMUMAB"
],
"drug_concept_id": [
"rxcui:318341",
"rxcui:340705",
"iuphar.ligand:7836",
"ncit:C188574",
"chembl:CHEMBL4594573",
"rxcui:263034"
],
"drug_approved": [true, true, false, false, false, true],
"interaction_score": [
0.2430910705356227, 0.921187214661307, 1.250182648468916, 4.375639269641208,
4.375639269641208, 0.4375639269641209
],
"interaction_attributes": [
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": ["Inhibition"],
"Direct Interaction": ["true"],
"Endogenous Drug": ["false"]
},
{
"Mechanism of Action": ["Proepiregulin inhibitor"],
"Direct Interaction": ["true"],
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
},
{
"Mechanism of Action": null,
"Direct Interaction": null,
"Endogenous Drug": null
}
],
"interaction_sources": [
["CIViC"],
["NCI"],
["GuideToPharmacology"],
["ChEMBL"],
["TTD"],
["CIViC"]
],
"interaction_pmids": [[], [16543407], [], [], [], []]
}
26 changes: 5 additions & 21 deletions tests/fixtures/get_categories_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,39 +20,23 @@
},
{
"name": "DRUG RESISTANCE",
"sourceNames": [
"CIViC",
"COSMIC"
]
"sourceNames": ["CIViC", "COSMIC"]
},
{
"name": "DRUGGABLE GENOME",
"sourceNames": [
"HingoraniCasas",
"HopkinsGroom",
"RussLampel"
]
"sourceNames": ["HingoraniCasas", "HopkinsGroom", "RussLampel"]
},
{
"name": "ENZYME",
"sourceNames": [
"GuideToPharmacology"
]
"sourceNames": ["GuideToPharmacology"]
},
{
"name": "KINASE",
"sourceNames": [
"HopkinsGroom",
"Pharos",
"dGene"
]
"sourceNames": ["HopkinsGroom", "Pharos", "dGene"]
},
{
"name": "SERINE THREONINE KINASE",
"sourceNames": [
"GO",
"dGene"
]
"sourceNames": ["GO", "dGene"]
}
]
}
Expand Down
54 changes: 13 additions & 41 deletions tests/fixtures/get_drug_applications_drugsatfda_response.json
Original file line number Diff line number Diff line change
Expand Up @@ -115,47 +115,19 @@
"application_number": "NDA212099",
"sponsor_name": "BAYER HEALTHCARE",
"openfda": {
"application_number": [
"NDA212099"
],
"brand_name": [
"NUBEQA"
],
"generic_name": [
"DAROLUTAMIDE"
],
"manufacturer_name": [
"Bayer HealthCare Pharmaceuticals Inc."
],
"product_ndc": [
"50419-395"
],
"product_type": [
"HUMAN PRESCRIPTION DRUG"
],
"route": [
"ORAL"
],
"substance_name": [
"DAROLUTAMIDE"
],
"rxcui": [
"2180330",
"2180336"
],
"spl_id": [
"78565601-df32-45a6-9d08-ac84738c8963"
],
"spl_set_id": [
"1a7cb212-56e4-4b9d-a73d-bfee7fe4735e"
],
"package_ndc": [
"50419-395-01",
"50419-395-72"
],
"unii": [
"X05U0N2RCO"
]
"application_number": ["NDA212099"],
"brand_name": ["NUBEQA"],
"generic_name": ["DAROLUTAMIDE"],
"manufacturer_name": ["Bayer HealthCare Pharmaceuticals Inc."],
"product_ndc": ["50419-395"],
"product_type": ["HUMAN PRESCRIPTION DRUG"],
"route": ["ORAL"],
"substance_name": ["DAROLUTAMIDE"],
"rxcui": ["2180330", "2180336"],
"spl_id": ["78565601-df32-45a6-9d08-ac84738c8963"],
"spl_set_id": ["1a7cb212-56e4-4b9d-a73d-bfee7fe4735e"],
"package_ndc": ["50419-395-01", "50419-395-72"],
"unii": ["X05U0N2RCO"]
},
"products": [
{
Expand Down
7,319 changes: 7,318 additions & 1 deletion tests/fixtures/introspection_response.json

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions tests/test_network_constructor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Test `dgipy.network.construct`."""

import json
from pathlib import Path

from dgipy.network import construct


def test_construct(fixtures_dir: Path):
results = json.load(
(fixtures_dir / "construct_network_input_interactions.json").open()
)
graph = construct.construct_graph(results)
assert len(graph.nodes) == 7
assert len(graph.edges) == 6
assert graph.nodes["hgnc:3443"]["long_name"] == "epiregulin"
assert graph.nodes["ncit:C188574"]["approved"] is False
assert graph.edges[("hgnc:3443", "iuphar.ligand:7836")]["attributes"][
"Mechanism of Action"
] == ["Inhibition"]

0 comments on commit aa5a450

Please sign in to comment.