Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: drop version_engine #940

Merged
merged 4 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions docs/python_api/common/version_engine.md

This file was deleted.

158 changes: 0 additions & 158 deletions src/gentropy/common/version_engine.py

This file was deleted.

30 changes: 18 additions & 12 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ class SessionConfig:
write_mode: str = "errorifexists"
spark_uri: str = "local[*]"
hail_home: str = os.path.dirname(hail_location)
extended_spark_conf: dict[str, str] | None = field(default_factory=dict[str, str])
extended_spark_conf: dict[str, str] | None = field(
default_factory=dict[str, str])
output_partitions: int = 200
_target_: str = "gentropy.common.session.Session"

Expand All @@ -39,7 +40,8 @@ class ColocalisationConfig(StepConfig):
credible_set_path: str = MISSING
coloc_path: str = MISSING
colocalisation_method: str = MISSING
colocalisation_method_params: dict[str, Any] = field(default_factory=dict[str, Any])
colocalisation_method_params: dict[str, Any] = field(
default_factory=dict[str, Any])
_target_: str = "gentropy.colocalisation.ColocalisationStep"


Expand Down Expand Up @@ -124,7 +126,8 @@ class EqtlCatalogueConfig(StepConfig):
eqtl_catalogue_paths_imported: str = MISSING
eqtl_catalogue_study_index_out: str = MISSING
eqtl_catalogue_credible_sets_out: str = MISSING
mqtl_quantification_methods_blacklist: list[str] = field(default_factory=lambda: [])
mqtl_quantification_methods_blacklist: list[str] = field(
default_factory=lambda: [])
eqtl_lead_pvalue_threshold: float = 1e-3
_target_: str = "gentropy.eqtl_catalogue.EqtlCatalogueStep"

Expand All @@ -146,7 +149,8 @@ class FinngenStudiesConfig(StepConfig):
)
finngen_summary_stats_url_suffix: str = ".gz"
efo_curation_mapping_url: str = "https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv"
sample_size: int = 453733 # https://www.finngen.fi/en/access_results#:~:text=Total%20sample%20size%3A%C2%A0453%2C733%C2%A0(254%2C618%C2%A0females%20and%C2%A0199%2C115%20males)
# https://www.finngen.fi/en/access_results#:~:text=Total%20sample%20size%3A%C2%A0453%2C733%C2%A0(254%2C618%C2%A0females%20and%C2%A0199%2C115%20males)
sample_size: int = 453733
_target_: str = "gentropy.finngen_studies.FinnGenStudiesStep"


Expand Down Expand Up @@ -199,7 +203,6 @@ class LDIndexConfig(StepConfig):
"nfe", # Non-Finnish European
]
)
use_version_from_input: bool = False
_target_: str = "gentropy.gnomad_ingestion.LDIndexStep"


Expand Down Expand Up @@ -409,7 +412,6 @@ class GnomadVariantConfig(StepConfig):
"remaining", # Other
]
)
use_version_from_input: bool = False
_target_: str = "gentropy.gnomad_ingestion.GnomadVariantIndexStep"


Expand All @@ -432,7 +434,6 @@ class PanUKBBConfig(StepConfig):
"EUR", # European
]
)
use_version_from_input: bool = False
_target_: str = "gentropy.pan_ukb_ingestion.PanUKBBVariantIndexStep"


Expand Down Expand Up @@ -680,7 +681,8 @@ class Config:
"""Application configuration."""

# this is unfortunately verbose due to @dataclass limitations
defaults: List[Any] = field(default_factory=lambda: ["_self_", {"step": MISSING}])
defaults: List[Any] = field(default_factory=lambda: [
"_self_", {"step": MISSING}])
step: StepConfig = MISSING
datasets: dict[str, str] = field(default_factory=dict)

Expand Down Expand Up @@ -714,7 +716,8 @@ def register_config() -> None:
name="gwas_catalog_top_hit_ingestion",
node=GWASCatalogTopHitIngestionConfig,
)
cs.store(group="step", name="ld_based_clumping", node=LDBasedClumpingConfig)
cs.store(group="step", name="ld_based_clumping",
node=LDBasedClumpingConfig)
cs.store(group="step", name="ld_index", node=LDIndexConfig)
cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig)
cs.store(
Expand All @@ -732,7 +735,8 @@ def register_config() -> None:

cs.store(group="step", name="pics", node=PICSConfig)
cs.store(group="step", name="gnomad_variants", node=GnomadVariantConfig)
cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess", node=UkbPppEurConfig)
cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess",
node=UkbPppEurConfig)
cs.store(group="step", name="variant_index", node=VariantIndexConfig)
cs.store(group="step", name="variant_to_vcf", node=ConvertToVcfStepConfig)
cs.store(
Expand Down Expand Up @@ -765,5 +769,7 @@ def register_config() -> None:
name="locus_to_gene_associations",
node=LocusToGeneAssociationsStepConfig,
)
cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig)
cs.store(group="step", name="credible_set_qc", node=CredibleSetQCStepConfig)
cs.store(group="step", name="finngen_ukb_meta_ingestion",
node=FinngenUkbMetaConfig)
cs.store(group="step", name="credible_set_qc",
node=CredibleSetQCStepConfig)
22 changes: 2 additions & 20 deletions src/gentropy/gnomad_ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from gentropy.common.session import Session
from gentropy.common.types import LD_Population, VariantPopulation
from gentropy.common.version_engine import VersionEngine
from gentropy.config import GnomadVariantConfig, LDIndexConfig
from gentropy.datasource.gnomad.ld import GnomADLDMatrix
from gentropy.datasource.gnomad.variants import GnomADVariants
Expand All @@ -26,10 +25,10 @@ def __init__(
min_r2: float = LDIndexConfig().min_r2,
ld_matrix_template: str = LDIndexConfig().ld_matrix_template,
ld_index_raw_template: str = LDIndexConfig().ld_index_raw_template,
ld_populations: list[LD_Population | str] = LDIndexConfig().ld_populations,
ld_populations: list[LD_Population |
str] = LDIndexConfig().ld_populations,
liftover_ht_path: str = LDIndexConfig().liftover_ht_path,
grch37_to_grch38_chain_path: str = LDIndexConfig().grch37_to_grch38_chain_path,
use_version_from_input: bool = LDIndexConfig().use_version_from_input,
) -> None:
"""Run step.

Expand All @@ -42,17 +41,9 @@ def __init__(
ld_populations (list[LD_Population | str]): Population names derived from the ld file paths
liftover_ht_path (str): Path to the liftover ht file
grch37_to_grch38_chain_path (str): Path to the chain file used to lift over the coordinates.
use_version_from_input (bool): Append version derived from input ld_matrix_template to the output ld_index_out. Defaults to False.

In case use_version_from_input is set to True,
data source version inferred from ld_matrix_temolate is appended as the last path segment to the output path.
Default values are provided in LDIndexConfig.
"""
if use_version_from_input:
# amend data source version to output path
ld_index_out = VersionEngine("gnomad").amend_version(
ld_matrix_template, ld_index_out
)
(
GnomADLDMatrix(
ld_matrix_template=ld_matrix_template,
Expand Down Expand Up @@ -84,7 +75,6 @@ def __init__(
gnomad_variant_populations: list[
VariantPopulation | str
] = GnomadVariantConfig().gnomad_variant_populations,
use_version_from_input: bool = GnomadVariantConfig().use_version_from_input,
) -> None:
"""Run Variant Annotation step.

Expand All @@ -93,18 +83,10 @@ def __init__(
variant_annotation_path (str): Path to resulting dataset.
gnomad_genomes_path (str): Path to gnomAD genomes hail table, e.g. `gs://gcp-public-data--gnomad/release/4.0/ht/genomes/gnomad.genomes.v4.0.sites.ht/`.
gnomad_variant_populations (list[VariantPopulation | str]): List of populations to include.
use_version_from_input (bool): Append version derived from input gnomad_genomes_path to the output variant_annotation_path. Defaults to False.

In case use_version_from_input is set to True,
data source version inferred from gnomad_genomes_path is appended as the last path segment to the output path.
All defaults are stored in the GnomadVariantConfig.
"""
# amend data source version to output path
if use_version_from_input:
variant_annotation_path = VersionEngine("gnomad").amend_version(
gnomad_genomes_path, variant_annotation_path
)

session.logger.info("Gnomad variant annotation path:")
session.logger.info(variant_annotation_path)
# Parse variant info from source.
Expand Down
Loading
Loading