Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: use regbot for clinical trial fetching #89

Merged
merged 4 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ repos:
rev: v4.6.0 # pre-commit-hooks version
hooks:
- id: check-added-large-files
exclude: tests/fixtures/integration_clinical_trials_zolgensma.json
- id: detect-private-key
- id: trailing-whitespace
- id: end-of-file-fixer
Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requires-python = ">=3.11"
description = "Python wrapper for accessing an instance of DGIdb v5 database"
license = {file = "LICENSE"}
dependencies = [
"regbot",
"regbot~=0.1.2",
"requests",
"gql[requests]",
"pandas~=2.0",
Expand All @@ -34,7 +34,12 @@ dependencies = [
dynamic = ["version"]

[project.optional-dependencies]
tests = ["pytest", "pytest-cov", "pytest-benchmark", "requests_mock"]
tests = [
"pytest",
"pytest-cov",
"pytest-benchmark",
"requests_mock",
]
dev = ["pre-commit>=3.7.1", "ruff==0.5.0"]
docs = [
"sphinx==6.1.3",
Expand Down
2 changes: 0 additions & 2 deletions src/dgipy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
SourceType,
get_all_genes,
get_categories,
get_clinical_trials,
get_drug_applications,
get_drugs,
get_genes,
Expand All @@ -23,5 +22,4 @@
"get_all_genes",
"get_drug_applications",
"generate_app",
"get_clinical_trials",
]
72 changes: 0 additions & 72 deletions src/dgipy/dgidb.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,75 +390,3 @@ def get_drug_applications(terms: list, api_url: str | None = None) -> dict:
product.active_ingredients[0].strength
)
return output


def get_clinical_trials(terms: list) -> dict:
"""Perform a look up for clinical trials data for drug or drugs of interest

:param terms: drugs of interest
:return: all clinical trials data for drugs of interest in a DataFrame
"""
base_url = "https://clinicaltrials.gov/api/v2/studies?format=json"

output = {
"search_term": [],
"trial_id": [],
"brief": [],
"study_type": [],
"min_age": [],
"age_groups": [],
"pediatric": [],
"conditions": [],
"interventions": [],
}

for drug in terms:
intr_url = f"&query.intr={drug}"
full_uri = base_url + intr_url # TODO: + cond_url + term_url
response = requests.get(full_uri, timeout=20)
try:
response.raise_for_status()
except requests.exceptions.RequestException as e:
_logger.error("Clinical trials lookup to URL %s failed: %s", full_uri, e)
raise e
if response.status_code != 200:
_logger.error(
"Received status code %s from request to %s -- returning empty dataframe",
response.status_code,
full_uri,
)
else:
data = response.json()

for study in data["studies"]:
output["search_term"].append(drug)
output["trial_id"].append(
study["protocolSection"]["identificationModule"]["nctId"]
)
output["brief"].append(
study["protocolSection"]["identificationModule"]["briefTitle"]
)
output["study_type"].append(
study["protocolSection"]["designModule"]["studyType"]
)
try:
output["min_age"].append(
study["protocolSection"]["eligibilityModule"]["minimumAge"]
)
except KeyError:
output["min_age"].append(None)

age_groups = study["protocolSection"]["eligibilityModule"]["stdAges"]

output["age_groups"].append(age_groups)
output["pediatric"].append("CHILD" in age_groups)
output["conditions"].append(
study["protocolSection"]["conditionsModule"]["conditions"]
)
try:
output["interventions"].append(
study["protocolSection"]["armsInterventionsModule"]
)
except KeyError:
output["interventions"].append(None)
return output
1 change: 1 addition & 0 deletions src/dgipy/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Provide tools for integrating DGIdb data with related resources."""
135 changes: 135 additions & 0 deletions src/dgipy/integrations/clinical_trials.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Integrate data from FDA clinical trials API."""

import logging

from regbot.fetch.clinical_trials import StandardAge, Status, Study
from regbot.fetch.clinical_trials import get_clinical_trials as get_trials_from_fda

_logger = logging.getLogger(__name__)


def _add_study_to_output(output: dict[str, list], drug_name: str, study: Study) -> None:
"""Update `output` in-place with results from study

:param output: in-progress raw columnar data

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing drug_name

:param drug_name: name of drug that was searched
:param study: clinical trial study data to add to output
"""
output["drug_name"].append(drug_name.upper())
output["trial_id"].append(study.protocol.identification.nct_id)
output["brief"].append(study.protocol.identification.brief_title)
output["study_type"].append(study.protocol.design.study_type)
min_age = (
study.protocol.eligibility.min_age
if study.protocol and study.protocol.eligibility
else None
)
output["min_age"].append(min_age)
max_age = (
study.protocol.eligibility.max_age
if study.protocol and study.protocol.eligibility
else None
)
output["max_age"].append(max_age)
age_groups = (
study.protocol.eligibility.std_age
if study.protocol and study.protocol.eligibility
else None
)
output["age_groups"].append(age_groups)
output["pediatric"].append(StandardAge.CHILD in age_groups if age_groups else None)
output["conditions"].append(
study.protocol.conditions.conditions
if study.protocol and study.protocol.conditions
else None
)
output["interventions"].append(
[i._asdict() for i in study.protocol.arms_intervention.interventions]
if study.protocol
and study.protocol.arms_intervention
and study.protocol.arms_intervention.interventions
else None
)
eligibility = study.protocol.eligibility
if not eligibility:
output["incl_excl_criteria"].append(None)
output["population_sex"].append(None)
output["population_description"]
else:
output["incl_excl_criteria"].append(eligibility.description)
output["population_sex"].append(eligibility.sex)
output["population_description"].append(eligibility.population)
all_locations = (
study.protocol.contacts_locations.locations
if study.protocol.contacts_locations
and study.protocol.contacts_locations.locations
else []
)

potential_sites = [
{
"name": location.facility,
"status": location.status,
"city": location.city,
"country": location.country,
"coordinates": location.geo,
}
for location in all_locations
if location.status
in {
Status.RECRUITING,
Status.NOT_YET_RECRUITING,
Status.AVAILABLE,
Status.TEMPORARILY_NOT_AVAILABLE,
Status.UNKNOWN,
}
]
output["potential_sites"].append(potential_sites)


def get_clinical_trials(terms: list[str]) -> dict:
"""Acquire associated clinical trials data for drug term

>>> from dgipy.dgidb import get_drugs
>>> from dgipy.integration.clinical_trials import get_clinical_trials
>>> import polars as pl # or another dataframe library of your choosing
>>> drugs = ["imatinib", "sunitinib"]
>>> df = pl.DataFrame(get_drugs(drugs))
>>> trial_df = pl.DataFrame(get_clinical_trials(drugs))
>>> annotated_df = df.join(trial_df, on="drug_name")

:param terms: drugs of interest
:return: all clinical trials data for drugs of interest in a DataFrame-ready dict
"""
if not isinstance(terms, list):
_logger.warning(
"Given `terms` arg doesn't appear to be a list. This argument should be a sequence of drug names (as strings)."
)
if not terms:
msg = "Must supply nonempty argument for `terms`"
raise ValueError(msg)

output = {
"drug_name": [],
"trial_id": [],
"brief": [],
"study_type": [],
"min_age": [],
"max_age": [],
"age_groups": [],
"pediatric": [],
"conditions": [],
"interventions": [],
"incl_excl_criteria": [],
"population_sex": [],
"population_description": [],
"potential_sites": [],
}

for drug in terms:
results = get_trials_from_fda(drug)

for study in results:
_add_study_to_output(output, drug, study)

return output
Loading
Loading