From 060982ee03dd6bb311d09609404b0425bb3c2c56 Mon Sep 17 00:00:00 2001 From: Benjamin Wingfield Date: Thu, 15 Sep 2022 13:26:07 +0100 Subject: [PATCH 1/2] pin polars to 0.14.9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9899ab..b8262b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ pandas = "^1.4.3" pyliftover = "^0.4" requests = "^2.28.1" jq = "^1.2.2" -polars = "^0.14.9" +polars = "0.14.9" [tool.poetry.dev-dependencies] pytest = "^7.1.2" From 96896e36df7edd69068b1c57d5cb6e3381e7502d Mon Sep 17 00:00:00 2001 From: Laurent Gil Date: Fri, 16 Sep 2022 15:11:00 +0100 Subject: [PATCH 2/2] Handle PGS Catalog REST API errors and retries --- pgscatalog_utils/download/publication.py | 10 ++++---- pgscatalog_utils/download/score.py | 32 +++++++++++++++++++++--- pgscatalog_utils/download/trait.py | 10 ++++---- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/pgscatalog_utils/download/publication.py b/pgscatalog_utils/download/publication.py index 843b8a2..56c7f7b 100644 --- a/pgscatalog_utils/download/publication.py +++ b/pgscatalog_utils/download/publication.py @@ -1,20 +1,20 @@ import logging from functools import reduce -import requests +from pgscatalog_utils.download.score import query_api logger = logging.getLogger(__name__) def query_publication(pgp: str) -> list[str]: - api: str = f'https://www.pgscatalog.org/rest/publication/{pgp}' logger.debug("Querying PGS Catalog with publication PGP ID") - r: requests.models.Response = requests.get(api) + api: str = f'/publication/{pgp}' + results_json = query_api(api) - if r.json() == {}: + if results_json == {} or results_json == None: logger.critical(f"Bad response from PGS Catalog for EFO term: {pgp}") raise Exception - pgs: dict[str, list[str]] = r.json().get('associated_pgs_ids') + pgs: dict[str, list[str]] = results_json.get('associated_pgs_ids') logger.debug(f"Valid response from PGS Catalog for PGP ID: {pgp}") return list(reduce(lambda x, y: set(x).union(set(y)), pgs.values())) diff --git a/pgscatalog_utils/download/score.py b/pgscatalog_utils/download/score.py index a38dc0c..edad470 100644 --- a/pgscatalog_utils/download/score.py +++ b/pgscatalog_utils/download/score.py @@ -3,6 +3,7 @@ import jq import requests +import time logger = logging.getLogger(__name__) @@ -28,11 +29,36 @@ def get_url(pgs: list[str], build: str) -> dict[str, str]: return dict(zip(pgs_result, url_result)) +def query_api(api: str, retry:int = 0) -> dict: + max_retries = 5 + wait = 60 + results_json = None + rest_url_root = 'https://www.pgscatalog.org/rest' + try: + r: requests.models.Response = requests.get(rest_url_root+api) + r.raise_for_status() + results_json = r.json() + except requests.exceptions.HTTPError as e: + print(f'HTTP Error: {e}') + if r.status_code in [421,429] and retry < 5: + retry +=1 + print(f'> Retry to query the PGS Catalog REST API in {wait}s ... attempt {retry} out of {max_retries}.') + time.sleep(wait) + results_json = query_api(api,retry) + except requests.exceptions.ConnectionError as e: + print(f'Error Connecting: {e}') + except requests.exceptions.Timeout as e: + print(f'Timeout Error: {e}') + except requests.exceptions.RequestException as e: + print(f'Request Error: {e}') + return results_json + + def query_score(pgs_id: list[str]) -> dict: pgs: str = ','.join(pgs_id) - api: str = f'https://www.pgscatalog.org/rest/score/search?pgs_ids={pgs}' - r: requests.models.Response = requests.get(api) - return r.json() + api: str = f'/score/search?pgs_ids={pgs}' + results_json = query_api(api) + return results_json def _chunker(pgs: list[str]): diff --git a/pgscatalog_utils/download/trait.py b/pgscatalog_utils/download/trait.py index c2db495..83af414 100644 --- a/pgscatalog_utils/download/trait.py +++ b/pgscatalog_utils/download/trait.py @@ -1,24 +1,24 @@ import logging from functools import reduce -import requests +from pgscatalog_utils.download.score import query_api logger = logging.getLogger(__name__) def query_trait(trait: str) -> list[str]: - api: str = f'https://www.pgscatalog.org/rest/trait/{trait}?include_children=1' logger.debug(f"Querying PGS Catalog with trait {trait}") - r: requests.models.Response = requests.get(api) + api: str = f'/trait/{trait}?include_children=1' + results_json = query_api(api) - if r.json() == {}: + if results_json == {} or results_json == None: logger.critical(f"Bad response from PGS Catalog for EFO term: {trait}") raise Exception keys: list[str] = ['associated_pgs_ids', 'child_associated_pgs_ids'] pgs: list[str] = [] for key in keys: - pgs.append(r.json().get(key)) + pgs.append(results_json.get(key)) logger.debug(f"Valid response from PGS Catalog for EFO term: {trait}") return list(reduce(lambda x, y: set(x).union(set(y)), pgs))