Skip to content

Commit

Permalink
feat: extract config for finngen ingeson to yaml file
Browse files Browse the repository at this point in the history
  • Loading branch information
Szymon Szyszkowski committed Sep 9, 2024
1 parent 9247f05 commit c2b7cb1
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 22 deletions.
17 changes: 17 additions & 0 deletions src/ot_orchestration/dags/config/finngen_ingestion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
finngen_release_prefix: FINNGEN_R11_

# inputs
efo_mappings_path: https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv
finngen_credible_set_input_glob: gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv
finngen_snp_input_glob: gs://finngen-public-data-r11/finemap/full/susie/*.snp.bgz
phenotype_table_url: https://r11.finngen.fi/api/phenos
finngen_summary_stats_url_prefix: gs://finngen-public-data-r11/summary_stats/finngen_R11_
finngen_summary_stats_url_suffix: ".gz"

# outputs
credible_set_output_path: gs://finngen_data/r11/credible_set_datasets/
study_index_output_path: gs://finngen_data/r11/study_index

# cluster setup
cluster_name: otg-ingestion-finngen
autoscaling_policy: finngen-preprocess
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,10 @@
install_dependencies,
submit_step,
)
from ot_orchestration.utils.utils import read_yaml_config

EFO_MAPPINGS_PATH = "https://raw.githubusercontent.com/opentargets/curation/24.09.1/mappings/disease/manual_string.tsv"
STUDY_INDEX_OUT = "gs://finngen_data/r11/study_index"
CREDIBLE_SETS_SUMMARY_IN = (
"gs://finngen-public-data-r11/finemap/summary/*.cred.summary.tsv"
)
SNP_IN = "gs://finngen-public-data-r11/finemap/full/susie/*.snp.bgz"
FINNGEN_PREFIX = "FINNGEN_R11_"
FINEMAPPING_OUT = "gs://finngen_data/r11/finemapping"

CLUSTER_NAME = "otg-finemapping-ingestion-finngen"
AUTOSCALING = "finngen-preprocess"
SOURCE_CONFIG_FILE_PATH = Path(__file__).parent / "config" / "finngen_ingestion.yaml"
config = read_yaml_config(SOURCE_CONFIG_FILE_PATH)

with DAG(
dag_id=Path(__file__).stem,
Expand All @@ -35,39 +27,42 @@
**common.shared_dag_kwargs,
):
finngen_finemapping_ingestion = submit_step(
cluster_name=CLUSTER_NAME,
cluster_name=config["cluster_name"],
step_id="finngen_finemapping_ingestion",
task_id="finngen_finemapping_ingestion",
other_args=[
f"step.finngen_finemapping_out={FINEMAPPING_OUT}",
f"step.finngen_release_prefix={FINNGEN_PREFIX}",
f"step.finngen_susie_finemapping_snp_files={SNP_IN}",
f"step.finngen_susie_finemapping_cs_summary_files={CREDIBLE_SETS_SUMMARY_IN}",
f"step.finngen_finemapping_out={config['credible_set_output_path']}",
f"step.finngen_susie_finemapping_snp_files={config['finngen_snp_input_glob']}",
f"step.finngen_susie_finemapping_cs_summary_files={config['finngen_credible_set_input_glob']}",
"step.session.start_hail=true",
"step.session.write_mode=overwrite",
],
trigger_rule=TriggerRule.ALL_DONE,
)

finngen_study_index = submit_step(
cluster_name=CLUSTER_NAME,
cluster_name=config["cluster_name"],
step_id="finngen_studies",
task_id="finngen_studies",
other_args=[
f"step.finngen_study_index_out={STUDY_INDEX_OUT}",
f"step.finngen_study_index_out={config['study_index_output_path']}",
f"step.finngen_phenotype_table_url={config['phenotype_table_url']}",
f"step.finngen_release_prefix={config['finngen_release_prefix']}",
f"step.finngen_summary_stats_url_prefix={config['finngen_summary_stats_url_prefix']}",
f"step.finngen_summary_stats_url_suffix={config['finngen_summary_stats_url_suffix']}",
"step.session.write_mode=overwrite",
],
trigger_rule=TriggerRule.ALL_DONE,
)
chain(
create_cluster(
CLUSTER_NAME,
autoscaling_policy=AUTOSCALING,
config["cluster_name"],
autoscaling_policy=config["autoscaling_policy"],
master_disk_size=2000,
num_workers=6,
),
install_dependencies(CLUSTER_NAME),
install_dependencies(config["cluster_name"]),
finngen_study_index,
finngen_finemapping_ingestion,
delete_cluster(CLUSTER_NAME),
delete_cluster(config["cluster_name"]),
)

0 comments on commit c2b7cb1

Please sign in to comment.