From eb683fffebd9b2d0972fb9d2ee86552f5fe762e4 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:40:19 -0500 Subject: [PATCH 01/15] Add pext resources --- gnomad/resources/grch38/gnomad.py | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index 62cb950bb..de3383cce 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -46,6 +46,7 @@ MAJOR_RELEASES = ["v3", "v4"] CURRENT_MAJOR_RELEASE = MAJOR_RELEASES[-1] +CURRENT_GTEX_RELEASE = "v10" GENOME_POPS = ["AFR", "AMI", "AMR", "ASJ", "EAS", "FIN", "NFE", "SAS", "OTH"] SUBSETS = { @@ -364,6 +365,17 @@ def _public_an_ht_path(data_type: str, version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/ht/{data_type}/gnomad.{data_type}.v{version}.allele_number_all_sites.ht" +def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: + """ + Get public pext hail table. + + :param type: One of "base_level" or "annotation_level" + :param version: The gnomAD version used to generate pext. + :param gtex_version: The GTEx version used to generate pext + :return: Path to pext Table + """ + return (f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht") + def public_release(data_type: str) -> VersionedTableResource: """ @@ -701,3 +713,30 @@ def gnomad_gks( outputs.append(out) return outputs + +def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableResource: + """ + Retrieve pext table by type. + + :param type: One of "base_level" or "annotation_level". + :param gtex_version: The GTEx version used to generate P + :return: Pext Table. + """ + if type not in ["base_level", "annotation_level"]: + raise DataException( + f"{type} not in ['base_level', 'annotation_level'], please select a type from" + f" ['base_level', 'annotation_level']" + ) + + current_release = CURRENT_EXOME_RELEASE + releases = EXOME_RELEASES + + return VersionedTableResource( + current_release, + { + release: GnomadPublicTableResource( + path=_public_pext_ht_path(type, release, gtex_version) + ) + for release in releases + }, + ) From 66a934fdef507962c99f087b4adcc73a2294e580 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 11 Dec 2024 14:44:12 -0500 Subject: [PATCH 02/15] Black formatting --- gnomad/resources/grch38/gnomad.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index de3383cce..eb71d6461 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -365,6 +365,7 @@ def _public_an_ht_path(data_type: str, version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/ht/{data_type}/gnomad.{data_type}.v{version}.allele_number_all_sites.ht" + def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: """ Get public pext hail table. @@ -374,7 +375,7 @@ def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: :param gtex_version: The GTEx version used to generate pext :return: Path to pext Table """ - return (f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht") + return f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht" def public_release(data_type: str) -> VersionedTableResource: @@ -714,6 +715,7 @@ def gnomad_gks( return outputs + def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableResource: """ Retrieve pext table by type. From a2485a05b082fb3fe0abc3d10c1161e8150a0ee0 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:48:21 -0500 Subject: [PATCH 03/15] Add constraint resources --- gnomad/resources/grch37/gnomad.py | 28 ++++++++++++++++++++++++++++ gnomad/resources/grch38/gnomad.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index cf933ed68..2d73799da 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -103,6 +103,14 @@ def _liftover_data_path(data_type: str, version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/liftover_grch38/ht/{data_type}/gnomad.{data_type}.r{version}.sites.liftover_grch38.ht" +def _public_constraint_ht_path(version: str) -> str: + """ + Get public constraint table path. + + :param version: One of the release versions of gnomAD on GRCh38 + :return: Path to constraint Table + """ + return (f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht") def public_release(data_type: str) -> VersionedTableResource: """ @@ -219,3 +227,23 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: contig = f".{contig}" if contig else "" return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" + +def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: + """ + Retrieve constraint table. + + :param version: One of the release versions of gnomAD on GRCh38 + :return: Constraint Table + """ + current_release = CURRENT_EXOME_RELEASE + releases = EXOME_RELEASES + + return VersionedTableResource( + current_release, + { + release: GnomadPublicTableResource( + path=_public_constraint_ht_path(release) + ) + for release in releases + }, + ) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index eb71d6461..ec7999c0c 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -377,6 +377,15 @@ def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht" +def _public_constraint_ht_path(version: str) -> str: + """ + Get public constraint table path. + + :param version: One of the release versions of gnomAD on GRCh38 + :return: Path to constraint Table + """ + return (f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.constraint_metrics.ht") + def public_release(data_type: str) -> VersionedTableResource: """ @@ -742,3 +751,23 @@ def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableR for release in releases }, ) + +def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: + """ + Retrieve constraint table. + + :param version: One of the release versions of gnomAD on GRCh38 + :return: Constraint Table + """ + current_release = CURRENT_EXOME_RELEASE + releases = EXOME_RELEASES + + return VersionedTableResource( + current_release, + { + release: GnomadPublicTableResource( + path=_public_constraint_ht_path(release) + ) + for release in releases + }, + ) From 0e7f3e73cb1063b99452de6b3c3fbed251fecefb Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:48:39 -0500 Subject: [PATCH 04/15] Add constraint resources --- gnomad/resources/grch37/gnomad.py | 9 +++++---- gnomad/resources/grch38/gnomad.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 2d73799da..46435968c 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -103,6 +103,7 @@ def _liftover_data_path(data_type: str, version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/liftover_grch38/ht/{data_type}/gnomad.{data_type}.r{version}.sites.liftover_grch38.ht" + def _public_constraint_ht_path(version: str) -> str: """ Get public constraint table path. @@ -110,7 +111,8 @@ def _public_constraint_ht_path(version: str) -> str: :param version: One of the release versions of gnomAD on GRCh38 :return: Path to constraint Table """ - return (f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht") + return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht" + def public_release(data_type: str) -> VersionedTableResource: """ @@ -228,6 +230,7 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: contig = f".{contig}" if contig else "" return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" + def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ Retrieve constraint table. @@ -241,9 +244,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource( - path=_public_constraint_ht_path(release) - ) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) for release in releases }, ) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index ec7999c0c..58b1f7a3b 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -377,6 +377,7 @@ def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht" + def _public_constraint_ht_path(version: str) -> str: """ Get public constraint table path. @@ -384,7 +385,7 @@ def _public_constraint_ht_path(version: str) -> str: :param version: One of the release versions of gnomAD on GRCh38 :return: Path to constraint Table """ - return (f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.constraint_metrics.ht") + return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.constraint_metrics.ht" def public_release(data_type: str) -> VersionedTableResource: @@ -752,6 +753,7 @@ def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableR }, ) + def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ Retrieve constraint table. @@ -765,9 +767,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource( - path=_public_constraint_ht_path(release) - ) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) for release in releases }, ) From f69f6484ae2bd749ca17a69ea57d869f10337425 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:05:59 -0500 Subject: [PATCH 05/15] Add pext for GRCh37 --- gnomad/resources/grch37/gnomad.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 46435968c..11aafea96 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -113,6 +113,18 @@ def _public_constraint_ht_path(version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht" +def _public_pext_path(data_format: str) -> str: + """ + Get public pext data. + + :param data_format: Either "tsv" or "ht". + :return: Path to pext data. + """ + # TODO: Could you confirm if this is correct path? + if data_format not in ["tsv.gz", "ht"]: + raise DataException(f"{data_format} not in ['tsv.gz', 'ht']") + return (f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.{data_format}") + def public_release(data_type: str) -> VersionedTableResource: """ @@ -230,6 +242,13 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: contig = f".{contig}" if contig else "" return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" +def pext() -> GnomadPublicTableResource: + """ + Retrieve pext data. + + :return: Pext Table + """ + return GnomadPublicTableResource(path=_public_pext_path('ht')) def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ From 4a41c7eb6abfc0d55ea72dd91f2d4535d7aff919 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:06:18 -0500 Subject: [PATCH 06/15] Add pext for GRCh37 --- gnomad/resources/grch37/gnomad.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 11aafea96..2c4ca332a 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -113,6 +113,7 @@ def _public_constraint_ht_path(version: str) -> str: """ return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht" + def _public_pext_path(data_format: str) -> str: """ Get public pext data. @@ -123,7 +124,7 @@ def _public_pext_path(data_format: str) -> str: # TODO: Could you confirm if this is correct path? if data_format not in ["tsv.gz", "ht"]: raise DataException(f"{data_format} not in ['tsv.gz', 'ht']") - return (f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.{data_format}") + return f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.{data_format}" def public_release(data_type: str) -> VersionedTableResource: @@ -242,13 +243,15 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: contig = f".{contig}" if contig else "" return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" + def pext() -> GnomadPublicTableResource: """ Retrieve pext data. :return: Pext Table """ - return GnomadPublicTableResource(path=_public_pext_path('ht')) + return GnomadPublicTableResource(path=_public_pext_path("ht")) + def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ From ddcab90770baeed933c5ebc679cf5417c5ad304f Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:14:24 -0500 Subject: [PATCH 07/15] fix unused argument --- gnomad/resources/grch37/gnomad.py | 2 +- gnomad/resources/grch38/gnomad.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 2c4ca332a..d39645aac 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -266,7 +266,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(version)) for release in releases }, ) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index 58b1f7a3b..d9ff6657c 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -767,7 +767,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(version)) for release in releases }, ) From 149b9d9502092ff90f4191b4981b47dcb2b84a4a Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:29:52 -0500 Subject: [PATCH 08/15] Make it default to current release --- gnomad/resources/grch37/gnomad.py | 4 ++-- gnomad/resources/grch38/gnomad.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index d39645aac..a514023c6 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -253,7 +253,7 @@ def pext() -> GnomadPublicTableResource: return GnomadPublicTableResource(path=_public_pext_path("ht")) -def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: +def constraint() -> VersionedTableResource: """ Retrieve constraint table. @@ -266,7 +266,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(version)) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) for release in releases }, ) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index d9ff6657c..0ea7932e0 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -754,7 +754,7 @@ def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableR ) -def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: +def constraint() -> VersionedTableResource: """ Retrieve constraint table. @@ -767,7 +767,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: return VersionedTableResource( current_release, { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(version)) + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) for release in releases }, ) From 78bd2f541ac3ab4e6b8e88959e8e86de763d4af2 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:56:53 -0500 Subject: [PATCH 09/15] Address review suggestions --- gnomad/resources/grch37/gnomad.py | 44 +++++++++++-------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index a514023c6..c959319ce 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -104,27 +104,22 @@ def _liftover_data_path(data_type: str, version: str) -> str: return f"gs://gnomad-public-requester-pays/release/{version}/liftover_grch38/ht/{data_type}/gnomad.{data_type}.r{version}.sites.liftover_grch38.ht" -def _public_constraint_ht_path(version: str) -> str: +def _public_constraint_ht_path() -> str: """ - Get public constraint table path. + Get public gene constraint Table path. - :param version: One of the release versions of gnomAD on GRCh38 - :return: Path to constraint Table + :return: Path to constraint Table. """ - return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht" + return f"gs://gnomad-public-requester-pays/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.ht" -def _public_pext_path(data_format: str) -> str: +def _public_pext_path() -> str: """ - Get public pext data. + Get public proportion expressed across transcripts (pext) data. - :param data_format: Either "tsv" or "ht". :return: Path to pext data. """ - # TODO: Could you confirm if this is correct path? - if data_format not in ["tsv.gz", "ht"]: - raise DataException(f"{data_format} not in ['tsv.gz', 'ht']") - return f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.{data_format}" + return (f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.ht") def public_release(data_type: str) -> VersionedTableResource: @@ -246,27 +241,18 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: def pext() -> GnomadPublicTableResource: """ - Retrieve pext data. + Retrieve proportion expressed across transcripts (pext) data. - :return: Pext Table + :return: Pext Table. """ - return GnomadPublicTableResource(path=_public_pext_path("ht")) + return GnomadPublicTableResource(path=_public_pext_path()) -def constraint() -> VersionedTableResource: +def constraint() -> GnomadPublicTableResource: """ - Retrieve constraint table. + Retrieve gene constraint table. - :param version: One of the release versions of gnomAD on GRCh38 - :return: Constraint Table + :param version: One of the release versions of gnomAD on GRCh37. + :return: Gene constraint Table. """ - current_release = CURRENT_EXOME_RELEASE - releases = EXOME_RELEASES - - return VersionedTableResource( - current_release, - { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) - for release in releases - }, - ) + return GnomadPublicTableResource(path=_public_constraint_ht_path()) From 0c021f476a62a82c71e33ab238d4d18ecdad4309 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 12:13:32 -0500 Subject: [PATCH 10/15] Black formatting --- gnomad/resources/grch37/gnomad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index c959319ce..07d6e72eb 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -119,7 +119,7 @@ def _public_pext_path() -> str: :return: Path to pext data. """ - return (f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.ht") + return f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.ht" def public_release(data_type: str) -> VersionedTableResource: From 384920d2c8d9a6f1f606b827a4d058e584a6aa89 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:21:56 -0500 Subject: [PATCH 11/15] Address review comments --- gnomad/resources/grch37/gnomad.py | 23 +++++++--- gnomad/resources/grch38/gnomad.py | 70 ++++++++++++++----------------- 2 files changed, 49 insertions(+), 44 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 07d6e72eb..c1af4ca4e 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -113,13 +113,26 @@ def _public_constraint_ht_path() -> str: return f"gs://gnomad-public-requester-pays/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.ht" -def _public_pext_path() -> str: +def _public_pext_path(pext_type: str = "base_level") -> str: """ Get public proportion expressed across transcripts (pext) data. + :param pext_type: One of "annotation_level" or "base_level". Default is "base_level". :return: Path to pext data. + :raises DataException: If the provided pext_type is invalid. """ - return f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.ht" + pext_paths = { + "annotation_level": "gs://gnomad-public-requester-pays/papers/2019-tx-annotation/pre_computed/all.possible.snvs.tx_annotated.021520.ht", + "base_level": "gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.ht", + } + + if pext_type not in pext_paths: + valid_types = list(pext_paths.keys()) + raise DataException( + f"Invalid pext_type: '{pext_type}'. Valid options are {valid_types}." + ) + + return pext_paths[pext_type] def public_release(data_type: str) -> VersionedTableResource: @@ -239,20 +252,20 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" -def pext() -> GnomadPublicTableResource: +def pext(pext_type: str='base_level') -> GnomadPublicTableResource: """ Retrieve proportion expressed across transcripts (pext) data. + :param pext_type: One of "annotation_level" or "base_level". Default is "base_level". :return: Pext Table. """ - return GnomadPublicTableResource(path=_public_pext_path()) + return GnomadPublicTableResource(path=_public_pext_path(pext_type)) def constraint() -> GnomadPublicTableResource: """ Retrieve gene constraint table. - :param version: One of the release versions of gnomAD on GRCh37. :return: Gene constraint Table. """ return GnomadPublicTableResource(path=_public_constraint_ht_path()) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index 0ea7932e0..fbd2ecc50 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -366,24 +366,29 @@ def _public_an_ht_path(data_type: str, version: str) -> str: return f"gs://gnomad-public-requester-pays/release/{version}/ht/{data_type}/gnomad.{data_type}.v{version}.allele_number_all_sites.ht" -def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str: +def _public_pext_ht_path(pext_type: str='base_level') -> str: """ Get public pext hail table. - :param type: One of "base_level" or "annotation_level" - :param version: The gnomAD version used to generate pext. - :param gtex_version: The GTEx version used to generate pext - :return: Path to pext Table + :param pext_type: One of "base_level" or "annotation_level". Default is "base_level". + :return: Path to pext Table. """ - return f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht" + valid_types = ["base_level", "annotation_level"] + + if pext_type not in valid_types: + raise DataException( + f"Invalid pext_type: '{pext_type}'. Valid options are {valid_types}." + ) + + return f"gs://gnomad-public-requester-pays/release/4.1/pext/gnomad.pext.gtex_v10.{pext_type}.ht" def _public_constraint_ht_path(version: str) -> str: """ Get public constraint table path. - :param version: One of the release versions of gnomAD on GRCh38 - :return: Path to constraint Table + :param version: One of the release versions of gnomAD on GRCh38. + :return: Path to gene constraint Table. """ return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.constraint_metrics.ht" @@ -726,48 +731,35 @@ def gnomad_gks( return outputs -def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableResource: +def pext(pext_type: str='base_level') -> GnomadPublicTableResource: """ Retrieve pext table by type. - :param type: One of "base_level" or "annotation_level". - :param gtex_version: The GTEx version used to generate P + :param pext_type: One of "base_level" or "annotation_level". Default is "base_level". :return: Pext Table. """ - if type not in ["base_level", "annotation_level"]: - raise DataException( - f"{type} not in ['base_level', 'annotation_level'], please select a type from" - f" ['base_level', 'annotation_level']" - ) - - current_release = CURRENT_EXOME_RELEASE - releases = EXOME_RELEASES - - return VersionedTableResource( - current_release, - { - release: GnomadPublicTableResource( - path=_public_pext_ht_path(type, release, gtex_version) - ) - for release in releases - }, - ) + return GnomadPublicTableResource(path=_public_pext_ht_path(pext_type)) -def constraint() -> VersionedTableResource: +def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ - Retrieve constraint table. + Retrieve gene constraint Table. - :param version: One of the release versions of gnomAD on GRCh38 - :return: Constraint Table + :param version: One of the release versions of gnomAD on GRCh38. Default is the current exome release. + :return: constraint Table. + :raises ValueError: If the version is not a valid release. """ + # Validate the version + if version not in EXOME_RELEASES: + raise ValueError(f"Invalid version: {version}. Must be one of {EXOME_RELEASES}.") + current_release = CURRENT_EXOME_RELEASE releases = EXOME_RELEASES return VersionedTableResource( - current_release, - { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) - for release in releases - }, - ) + current_release, + { + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) + for release in releases + }, + ) From 7f97ab00350a2c244b2e90337df46944f454d402 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:22:10 -0500 Subject: [PATCH 12/15] Address review comments --- gnomad/resources/grch37/gnomad.py | 2 +- gnomad/resources/grch38/gnomad.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index c1af4ca4e..4a1c23364 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -252,7 +252,7 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str: return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz" -def pext(pext_type: str='base_level') -> GnomadPublicTableResource: +def pext(pext_type: str = "base_level") -> GnomadPublicTableResource: """ Retrieve proportion expressed across transcripts (pext) data. diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index fbd2ecc50..2d6f6c2f5 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -366,7 +366,7 @@ def _public_an_ht_path(data_type: str, version: str) -> str: return f"gs://gnomad-public-requester-pays/release/{version}/ht/{data_type}/gnomad.{data_type}.v{version}.allele_number_all_sites.ht" -def _public_pext_ht_path(pext_type: str='base_level') -> str: +def _public_pext_ht_path(pext_type: str = "base_level") -> str: """ Get public pext hail table. @@ -731,7 +731,7 @@ def gnomad_gks( return outputs -def pext(pext_type: str='base_level') -> GnomadPublicTableResource: +def pext(pext_type: str = "base_level") -> GnomadPublicTableResource: """ Retrieve pext table by type. @@ -751,15 +751,17 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: """ # Validate the version if version not in EXOME_RELEASES: - raise ValueError(f"Invalid version: {version}. Must be one of {EXOME_RELEASES}.") + raise ValueError( + f"Invalid version: {version}. Must be one of {EXOME_RELEASES}." + ) current_release = CURRENT_EXOME_RELEASE releases = EXOME_RELEASES return VersionedTableResource( - current_release, - { - release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) - for release in releases - }, - ) + current_release, + { + release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) + for release in releases + }, + ) From f386dea1d79496c5ca29114ff5ec3bbdb77fe666 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:26:49 -0500 Subject: [PATCH 13/15] minor edit --- gnomad/resources/grch38/gnomad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index 2d6f6c2f5..6267be414 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -368,7 +368,7 @@ def _public_an_ht_path(data_type: str, version: str) -> str: def _public_pext_ht_path(pext_type: str = "base_level") -> str: """ - Get public pext hail table. + Get public proportion expressed across transcripts (pext) data. :param pext_type: One of "base_level" or "annotation_level". Default is "base_level". :return: Path to pext Table. @@ -749,7 +749,6 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: :return: constraint Table. :raises ValueError: If the version is not a valid release. """ - # Validate the version if version not in EXOME_RELEASES: raise ValueError( f"Invalid version: {version}. Must be one of {EXOME_RELEASES}." From 235af442b154bec59e768cb1f94730acb6e42770 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 14:31:18 -0500 Subject: [PATCH 14/15] Remove unused f-string --- gnomad/resources/grch37/gnomad.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gnomad/resources/grch37/gnomad.py b/gnomad/resources/grch37/gnomad.py index 4a1c23364..9c9ab1a99 100644 --- a/gnomad/resources/grch37/gnomad.py +++ b/gnomad/resources/grch37/gnomad.py @@ -110,7 +110,7 @@ def _public_constraint_ht_path() -> str: :return: Path to constraint Table. """ - return f"gs://gnomad-public-requester-pays/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.ht" + return "gs://gnomad-public-requester-pays/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.ht" def _public_pext_path(pext_type: str = "base_level") -> str: From 3f62849597392fbefbc9417a9c64244ff3bc2355 Mon Sep 17 00:00:00 2001 From: Qin He <44242118+KoalaQin@users.noreply.github.com> Date: Wed, 18 Dec 2024 15:25:46 -0500 Subject: [PATCH 15/15] Apply review suggestions --- gnomad/resources/grch38/gnomad.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/gnomad/resources/grch38/gnomad.py b/gnomad/resources/grch38/gnomad.py index 6267be414..3403a1d14 100644 --- a/gnomad/resources/grch38/gnomad.py +++ b/gnomad/resources/grch38/gnomad.py @@ -46,8 +46,6 @@ MAJOR_RELEASES = ["v3", "v4"] CURRENT_MAJOR_RELEASE = MAJOR_RELEASES[-1] -CURRENT_GTEX_RELEASE = "v10" - GENOME_POPS = ["AFR", "AMI", "AMR", "ASJ", "EAS", "FIN", "NFE", "SAS", "OTH"] SUBSETS = { "v3": [ @@ -746,7 +744,7 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: Retrieve gene constraint Table. :param version: One of the release versions of gnomAD on GRCh38. Default is the current exome release. - :return: constraint Table. + :return: Gene constraint Table. :raises ValueError: If the version is not a valid release. """ if version not in EXOME_RELEASES: @@ -754,13 +752,10 @@ def constraint(version: str = CURRENT_EXOME_RELEASE) -> VersionedTableResource: f"Invalid version: {version}. Must be one of {EXOME_RELEASES}." ) - current_release = CURRENT_EXOME_RELEASE - releases = EXOME_RELEASES - return VersionedTableResource( - current_release, + CURRENT_EXOME_RELEASE, { release: GnomadPublicTableResource(path=_public_constraint_ht_path(release)) - for release in releases + for release in EXOME_RELEASES }, )