Skip to content

Commit

Permalink
Merge pull request #455 from broadinstitute/dp-nextstrain
Browse files Browse the repository at this point in the history
parameterize and update defaults for VM shapes in nextstrain tasks
  • Loading branch information
dpark01 authored Mar 7, 2023
2 parents 8fe6562 + 3048552 commit 202d952
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 46 deletions.
89 changes: 44 additions & 45 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ task nextclade_one_sample {
File? pcr_primers_csv
File? virus_properties
String? dataset_name
Int disk_size = 50
String docker = "nextstrain/nextclade:2.12.0"
}
String basename = basename(genome_fasta, ".fasta")
Int disk_size = 50
command {
set -e
apt-get update
Expand Down Expand Up @@ -100,9 +100,9 @@ task nextclade_many_samples {
String? dataset_name
String basename
File? genome_ids_setdefault_blank
Int disk_size = 150
String docker = "nextstrain/nextclade:2.12.0"
}
Int disk_size = 100
command <<<
set -e
apt-get update
Expand Down Expand Up @@ -281,8 +281,8 @@ task derived_cols {
Array[File] table_map = []
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
Int disk_size = 50
}
Int disk_size = 50
parameter_meta {
lab_highlight_loc: {
description: "This option copies the 'originating_lab' and 'submitting_lab' columns to new ones including a prefix, but only if they match certain criteria. The value of this string must be of the form prefix;col_header=value:col_header=value. For example, 'MA;country=USA:division=Massachusetts' will copy the originating_lab and submitting_lab columns to MA_originating_lab and MA_submitting_lab, but only for those rows where country=USA and division=Massachusetts."
Expand Down Expand Up @@ -391,12 +391,12 @@ task derived_cols {
task filter_segments {
input {
File all_samples_fasta
Int? segment = 1
Int segment = 1
File? pre_assembled_samples_fasta
Int? machine_mem_gb
Int machine_mem_gb = 3
Int disk_size = 375
}
Int disk_size = 375
command <<<
python3 <<CODE
Expand Down Expand Up @@ -424,7 +424,7 @@ task filter_segments {
>>>
runtime {
docker: "python:slim"
memory: select_first([machine_mem_gb, 3]) + " GB"
memory: machine_mem_gb + " GB"
cpu: 1
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
Expand All @@ -449,9 +449,10 @@ task nextstrain_build_subsample {
File? keep_list
File? drop_list
Int? machine_mem_gb
Int machine_mem_gb = 50
String docker = "nextstrain/base:build-20211012T204409Z"
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
Int disk_size = 750
}
parameter_meta {
alignment_msa_fasta: {
Expand All @@ -474,7 +475,6 @@ task nextstrain_build_subsample {
patterns: ["*.yaml"]
}
}
Int disk_size = 375
command <<<
set -e -o pipefail
augur version > VERSION
Expand Down Expand Up @@ -570,7 +570,7 @@ task nextstrain_build_subsample {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 50]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand All @@ -595,8 +595,8 @@ task nextstrain_ncov_defaults {
input {
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 50
}
Int disk_size = 50
command {
set -e
wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz"
Expand Down Expand Up @@ -633,10 +633,9 @@ task nextstrain_deduplicate_sequences {
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 375
parameter_meta {
sequences_fasta: {
description: "FASTA file with multiple sequences",
Expand Down Expand Up @@ -688,10 +687,9 @@ task nextstrain_ncov_sanitize_gisaid_data {
String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a"
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 375
parameter_meta {
sequences_gisaid_fasta: {
description: "Multiple sequences downloaded from GISAID",
Expand Down Expand Up @@ -765,8 +763,8 @@ task filter_subsample_sequences {
Array[String]? include_where
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 100
parameter_meta {
sequences_fasta: {
description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.",
Expand Down Expand Up @@ -850,8 +848,8 @@ task filter_sequences_to_list {
String out_fname = sub(sub(basename(sequences), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 200
parameter_meta {
sequences: {
description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.",
Expand Down Expand Up @@ -952,8 +950,8 @@ task mafft_one_chr {
String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
Int mem_size = 500
Int cpus = 64
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e
Expand Down Expand Up @@ -1039,9 +1037,9 @@ task mafft_one_chr_chunked {
String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2"
Int mem_size = 32
Int cpus = 96
Int cpus = 64
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e
Expand Down Expand Up @@ -1146,8 +1144,8 @@ task augur_mafft_align {
Boolean remove_reference = true
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 750
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1187,9 +1185,9 @@ task snp_sites {
File msa_fasta
Boolean allow_wildcard_bases = true
String docker = "quay.io/biocontainers/snp-sites:2.5.1--hed695b0_0"
Int disk_size = 750
}
String out_basename = basename(msa_fasta, ".fasta")
Int disk_size = 100
command {
snp-sites -V > VERSION
snp-sites -v ~{true="" false="-c" allow_wildcard_bases} -o "~{out_basename}.vcf" "~{msa_fasta}"
Expand Down Expand Up @@ -1219,8 +1217,8 @@ task augur_mask_sites {
File? mask_bed
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
Int disk_size = 100
parameter_meta {
sequences: {
description: "Set of alignments (fasta format) or variants (vcf format) to mask.",
Expand Down Expand Up @@ -1275,10 +1273,11 @@ task draft_augur_tree {
File? vcf_reference
String? tree_builder_args
Int? cpus
Int cpus = 64
Int machine_mem_gb = 32
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 1250
}
Int disk_size = 750
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to construct a tree from using augur tree (iqTree).",
Expand All @@ -1303,8 +1302,8 @@ task draft_augur_tree {
>>>
runtime {
docker: docker
memory: "32 GB"
cpu: select_first([cpus, 64])
memory: machine_mem_gb + " GB"
cpu: cpus
disks: "local-disk " + disk_size + " LOCAL"
disk: disk_size + " GB" # TES
dx_instance_type: "mem1_ssd1_v2_x36"
Expand Down Expand Up @@ -1346,8 +1345,9 @@ task refine_augur_tree {
File? vcf_reference
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
Int machine_mem_gb = 75
}
Int disk_size = 100
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.",
Expand Down Expand Up @@ -1386,7 +1386,7 @@ task refine_augur_tree {
>>>
runtime {
docker: docker
memory: "50 GB"
memory: machine_mem_gb + " GB"
cpu : 2
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1417,11 +1417,11 @@ task ancestral_traits {
File? weights
Float? sampling_bias_correction
Int? machine_mem_gb
Int machine_mem_gb = 32
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 750
}
String out_basename = basename(tree, '.nwk')
Int disk_size = 100
command <<<
set -e
augur version > VERSION
Expand All @@ -1439,7 +1439,7 @@ task ancestral_traits {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 32]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1472,8 +1472,8 @@ task ancestral_tree {
File? output_vcf
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
Int disk_size = 50
parameter_meta {
msa_or_vcf: {
description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.",
Expand Down Expand Up @@ -1533,9 +1533,9 @@ task translate_augur_tree {
File? vcf_reference
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
String out_basename = basename(tree, '.nwk')
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1588,11 +1588,11 @@ task tip_frequencies {
Boolean censored = false
Boolean include_internal_nodes = false
Int? machine_mem_gb
Int machine_mem_gb = 64
String docker = "nextstrain/base:build-20211012T204409Z"
String out_basename = basename(tree, '.nwk')
Int disk_size = 200
}
Int disk_size = 100
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1621,7 +1621,7 @@ task tip_frequencies {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 30]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1650,9 +1650,9 @@ task assign_clades_to_nodes {
File clades_tsv
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
String out_basename = basename(basename(tree_nwk, ".nwk"), "_timetree")
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand Down Expand Up @@ -1693,11 +1693,11 @@ task augur_import_beast {
String? tip_date_format
String? tip_date_delimiter
Int? machine_mem_gb
Int machine_mem_gb = 3
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 150
}
String tree_basename = basename(beast_mcc_tree, ".tree")
Int disk_size = 50
command <<<
set -e
augur version > VERSION
Expand All @@ -1715,7 +1715,7 @@ task augur_import_beast {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 3]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 2
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down Expand Up @@ -1754,11 +1754,10 @@ task export_auspice_json {
String out_basename = basename(basename(tree, ".nwk"), "_timetree")
Int? machine_mem_gb
Int machine_mem_gb = 64
String docker = "nextstrain/base:build-20211012T204409Z"
Int disk_size = 300
}
Int disk_size = 100
command <<<
set -e -o pipefail
Expand Down Expand Up @@ -1819,7 +1818,7 @@ task export_auspice_json {
>>>
runtime {
docker: docker
memory: select_first([machine_mem_gb, 64]) + " GB"
memory: machine_mem_gb + " GB"
cpu : 4
disks: "local-disk " + disk_size + " HDD"
disk: disk_size + " GB" # TES
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -721,8 +721,8 @@ task filter_sequences_by_length {
Int min_non_N = 1
String docker = "quay.io/broadinstitute/viral-core:2.1.33"
Int disk_size = 750
}
Int disk_size = 300
parameter_meta {
sequences_fasta: {
description: "Set of sequences in fasta format",
Expand Down

0 comments on commit 202d952

Please sign in to comment.