Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pext and constraint resources #743

Merged
merged 15 commits into from
Dec 18, 2024
51 changes: 51 additions & 0 deletions gnomad/resources/grch37/gnomad.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,29 @@ def _liftover_data_path(data_type: str, version: str) -> str:
return f"gs://gnomad-public-requester-pays/release/{version}/liftover_grch38/ht/{data_type}/gnomad.{data_type}.r{version}.sites.liftover_grch38.ht"


def _public_constraint_ht_path(version: str) -> str:
"""
Get public constraint table path.

:param version: One of the release versions of gnomAD on GRCh38
:return: Path to constraint Table
"""
return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.lof_metrics.by_transcript.ht"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

path looks good for 2.1.1, but not 2.1. I'm fine with only supporting 2.1.1 for grch37 though, so we could just remove the version



def _public_pext_path(data_format: str) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an option so you can load either base level or annotation level

"""
Get public pext data.

:param data_format: Either "tsv" or "ht".
:return: Path to pext data.
"""
# TODO: Could you confirm if this is correct path?
if data_format not in ["tsv.gz", "ht"]:
raise DataException(f"{data_format} not in ['tsv.gz', 'ht']")
return f"gs://gnomad-public-requester-pays/papers/2019-tx-annotation/gnomad_browser/all.baselevel.021620.{data_format}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is correct for the base level

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find the annotation level, do you have a clue?



def public_release(data_type: str) -> VersionedTableResource:
"""
Retrieve publicly released versioned table resource.
Expand Down Expand Up @@ -219,3 +242,31 @@ def release_vcf_path(data_type: str, version: str, contig: str) -> str:

contig = f".{contig}" if contig else ""
return f"gs://gcp-public-data--gnomad/release/{version}/vcf/{data_type}/gnomad.{data_type}.r{version}.sites{contig}.vcf.bgz"


def pext() -> GnomadPublicTableResource:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add option to choose base or annotation level

"""
Retrieve pext data.

:return: Pext Table
"""
return GnomadPublicTableResource(path=_public_pext_path("ht"))


def constraint() -> VersionedTableResource:
"""
Retrieve constraint table.

:param version: One of the release versions of gnomAD on GRCh38
:return: Constraint Table
"""
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES

return VersionedTableResource(
current_release,
{
release: GnomadPublicTableResource(path=_public_constraint_ht_path(release))
for release in releases
},
)
70 changes: 70 additions & 0 deletions gnomad/resources/grch38/gnomad.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
MAJOR_RELEASES = ["v3", "v4"]
CURRENT_MAJOR_RELEASE = MAJOR_RELEASES[-1]

CURRENT_GTEX_RELEASE = "v10"

GENOME_POPS = ["AFR", "AMI", "AMR", "ASJ", "EAS", "FIN", "NFE", "SAS", "OTH"]
SUBSETS = {
Expand Down Expand Up @@ -365,6 +366,28 @@ def _public_an_ht_path(data_type: str, version: str) -> str:
return f"gs://gnomad-public-requester-pays/release/{version}/ht/{data_type}/gnomad.{data_type}.v{version}.allele_number_all_sites.ht"


def _public_pext_ht_path(type: str, version: str, gtex_versioin: str) -> str:
"""
Get public pext hail table.

:param type: One of "base_level" or "annotation_level"
:param version: The gnomAD version used to generate pext.
:param gtex_version: The GTEx version used to generate pext
:return: Path to pext Table
"""
return f"gs://gnomad-public-requester-pays/release/{version}/pext/gnomad.pext.gtex_{gtex_versioin}.{type}.ht"


def _public_constraint_ht_path(version: str) -> str:
"""
Get public constraint table path.

:param version: One of the release versions of gnomAD on GRCh38
:return: Path to constraint Table
"""
return f"gs://gnomad-public-requester-pays/release/{version}/constraint/gnomad.v{version}.constraint_metrics.ht"


def public_release(data_type: str) -> VersionedTableResource:
"""
Retrieve publicly released versioned table resource.
Expand Down Expand Up @@ -701,3 +724,50 @@ def gnomad_gks(
outputs.append(out)

return outputs


def pext(type: str, gtex_version: str = CURRENT_GTEX_RELEASE) -> VersionedTableResource:
"""
Retrieve pext table by type.

:param type: One of "base_level" or "annotation_level".
:param gtex_version: The GTEx version used to generate P
:return: Pext Table.
"""
if type not in ["base_level", "annotation_level"]:
raise DataException(
f"{type} not in ['base_level', 'annotation_level'], please select a type from"
f" ['base_level', 'annotation_level']"
)

current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES

return VersionedTableResource(
current_release,
{
release: GnomadPublicTableResource(
path=_public_pext_ht_path(type, release, gtex_version)
)
for release in releases
},
)


def constraint() -> VersionedTableResource:
"""
Retrieve constraint table.

:param version: One of the release versions of gnomAD on GRCh38
:return: Constraint Table
"""
current_release = CURRENT_EXOME_RELEASE
releases = EXOME_RELEASES

return VersionedTableResource(
current_release,
{
release: GnomadPublicTableResource(path=_public_constraint_ht_path(release))
for release in releases
},
)
Loading