Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: use regbot for clinical trial fetching #89

Merged
merged 4 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ repos:
rev: v4.6.0 # pre-commit-hooks version
hooks:
- id: check-added-large-files
exclude: tests/fixtures/integration_clinical_trials_zolgensma.json
- id: detect-private-key
- id: trailing-whitespace
- id: end-of-file-fixer
Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requires-python = ">=3.11"
description = "Python wrapper for accessing an instance of DGIdb v5 database"
license = {file = "LICENSE"}
dependencies = [
"regbot",
"regbot~=0.1.1",
"requests",
"gql[requests]",
"pandas~=2.0",
Expand All @@ -34,7 +34,12 @@ dependencies = [
dynamic = ["version"]

[project.optional-dependencies]
tests = ["pytest", "pytest-cov", "pytest-benchmark", "requests_mock"]
tests = [
"pytest",
"pytest-cov",
"pytest-benchmark",
"requests_mock",
]
dev = ["pre-commit>=3.7.1", "ruff==0.5.0"]
docs = [
"sphinx==6.1.3",
Expand Down
2 changes: 0 additions & 2 deletions src/dgipy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
SourceType,
get_all_genes,
get_categories,
get_clinical_trials,
get_drug_applications,
get_drugs,
get_genes,
Expand All @@ -23,5 +22,4 @@
"get_all_genes",
"get_drug_applications",
"generate_app",
"get_clinical_trials",
]
72 changes: 0 additions & 72 deletions src/dgipy/dgidb.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,75 +390,3 @@ def get_drug_applications(terms: list, api_url: str | None = None) -> dict:
product.active_ingredients[0].strength
)
return output


def get_clinical_trials(terms: list) -> dict:
"""Perform a look up for clinical trials data for drug or drugs of interest

:param terms: drugs of interest
:return: all clinical trials data for drugs of interest in a DataFrame
"""
base_url = "https://clinicaltrials.gov/api/v2/studies?format=json"

output = {
"search_term": [],
"trial_id": [],
"brief": [],
"study_type": [],
"min_age": [],
"age_groups": [],
"pediatric": [],
"conditions": [],
"interventions": [],
}

for drug in terms:
intr_url = f"&query.intr={drug}"
full_uri = base_url + intr_url # TODO: + cond_url + term_url
response = requests.get(full_uri, timeout=20)
try:
response.raise_for_status()
except requests.exceptions.RequestException as e:
_logger.error("Clinical trials lookup to URL %s failed: %s", full_uri, e)
raise e
if response.status_code != 200:
_logger.error(
"Received status code %s from request to %s -- returning empty dataframe",
response.status_code,
full_uri,
)
else:
data = response.json()

for study in data["studies"]:
output["search_term"].append(drug)
output["trial_id"].append(
study["protocolSection"]["identificationModule"]["nctId"]
)
output["brief"].append(
study["protocolSection"]["identificationModule"]["briefTitle"]
)
output["study_type"].append(
study["protocolSection"]["designModule"]["studyType"]
)
try:
output["min_age"].append(
study["protocolSection"]["eligibilityModule"]["minimumAge"]
)
except KeyError:
output["min_age"].append(None)

age_groups = study["protocolSection"]["eligibilityModule"]["stdAges"]

output["age_groups"].append(age_groups)
output["pediatric"].append("CHILD" in age_groups)
output["conditions"].append(
study["protocolSection"]["conditionsModule"]["conditions"]
)
try:
output["interventions"].append(
study["protocolSection"]["armsInterventionsModule"]
)
except KeyError:
output["interventions"].append(None)
return output
1 change: 1 addition & 0 deletions src/dgipy/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide tools for integrating DGIdb data with related resources."""
79 changes: 79 additions & 0 deletions src/dgipy/integration/clinical_trials.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""Integrate data from FDA clinical trials API."""

from regbot.fetch.clinical_trials import StandardAge
from regbot.fetch.clinical_trials import get_clinical_trials as get_trials_from_fda


def get_clinical_trials(terms: list) -> dict:
"""Acquire associated clinical trials data for drug term

>>> from dgipy.dgidb import get_drugs
>>> from dgipy.integration.clinical_trials import get_clinical_trials
>>> import polars as pl # or another dataframe library of your choosing
>>> drugs = ["imatinib", "sunitinib"]
>>> df = pl.DataFrame(get_drugs(drugs))
>>> trial_df = pl.DataFrame(get_clinical_trials(drugs))
>>> annotated_df = df.join(trial_df, on="drug_name")

:param terms: drugs of interest
:return: all clinical trials data for drugs of interest in a DataFrame-ready dict
"""
if isinstance(terms, str):
# we shouldn't be too picky about input types, but this is an easy mistake
# that's worth trying to catch
msg = (
"`get_clinical_trials()` takes a list of terms as a parameter, not a string"
)
raise ValueError(msg)
if not terms:
msg = "Must supply nonempty argument for `terms`"
raise ValueError(msg)

output = {
"drug_name": [],
"trial_id": [],
"brief": [],
"study_type": [],
"min_age": [],
"age_groups": [],
"pediatric": [],
"conditions": [],
"interventions": [],
}

for drug in terms:
results = get_trials_from_fda(drug)

for study in results:
output["drug_name"].append(drug.upper())
output["trial_id"].append(study.protocol.identification.nct_id)
output["brief"].append(study.protocol.identification.brief_title)
output["study_type"].append(study.protocol.design.study_type)
min_age = (
study.protocol.eligibility.min_age
if study.protocol and study.protocol.eligibility
else None
)
output["min_age"].append(min_age)
age_groups = (
study.protocol.eligibility.std_age
if study.protocol and study.protocol.eligibility
else None
)
output["age_groups"].append(age_groups)
output["pediatric"].append(
StandardAge.CHILD in age_groups if age_groups else None
)
output["conditions"].append(
study.protocol.conditions.conditions
if study.protocol and study.protocol.conditions
else None
)
output["interventions"].append(
[i._asdict() for i in study.protocol.arms_intervention.interventions]
if study.protocol
and study.protocol.arms_intervention
and study.protocol.arms_intervention.interventions
else None
)
return output
Loading
Loading