diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 6b2f75e74..a641136ed 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -18,6 +18,8 @@ task assemble { String docker = "quay.io/broadinstitute/viral-assemble:2.1.16.1" } + Int disk_size = 375 + command { set -ex -o pipefail @@ -57,7 +59,8 @@ task assemble { docker: docker memory: select_first([machine_mem_gb, 15]) + " GB" cpu: 4 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x8" maxRetries: 2 } @@ -87,6 +90,8 @@ task scaffold { String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades") } + Int disk_size = 375 + command { set -ex -o pipefail @@ -150,7 +155,8 @@ task scaffold { docker: docker memory: select_first([machine_mem_gb, 31]) + " GB" cpu: 4 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x8" maxRetries: 2 } @@ -174,6 +180,7 @@ task ivar_trim { } String bam_basename=basename(aligned_bam, ".bam") + Int disk_size = 375 parameter_meta { aligned_bam: { description: "aligned reads in BAM format", patterns: ["*.bam"] } @@ -215,7 +222,8 @@ task ivar_trim { docker: docker memory: select_first([machine_mem_gb, 7]) + " GB" cpu: 4 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" maxRetries: 2 } @@ -231,6 +239,8 @@ task ivar_trim_stats { String docker = "quay.io/broadinstitute/py3-bio:0.1.2" } + Int disk_size = 50 + command <<< set -e python3<.tar.zst, krona-.tar.zst, and taxdump-.tar.gz" } taxonomy_db_tgz: { @@ -457,7 +467,8 @@ task build_kraken2_db { runtime { docker: "${docker}" memory: select_first([machine_mem_gb, 100]) + " GB" - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES cpu: 16 dx_instance_type: "mem3_ssd1_v2_x16" preemptible: 0 @@ -494,6 +505,7 @@ task blastx { } String out_basename=basename(contigs_fasta, '.fasta') + Int disk_size = 375 command { set -ex -o pipefail @@ -544,7 +556,8 @@ task blastx { docker: "${docker}" memory: select_first([machine_mem_gb, 8]) + " GB" cpu: 32 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x36" preemptible: 1 maxRetries: 2 @@ -567,6 +580,8 @@ task krona { String docker = "quay.io/broadinstitute/viral-classify:2.1.16.0" } + Int disk_size = 50 + command { set -ex -o pipefail if [ -z "$TMPDIR" ]; then @@ -616,7 +631,8 @@ task krona { docker: "${docker}" memory: "3 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x2" maxRetries: 2 } @@ -631,6 +647,8 @@ task krona_merge { String docker = "biocontainers/krona:v2.7.1_cv1" } + Int disk_size = 50 + command { set -ex -o pipefail ktImportKrona | head -2 | tail -1 | cut -f 2-3 -d ' ' | tee VERSION @@ -646,7 +664,8 @@ task krona_merge { docker: "${docker}" memory: select_first([machine_mem_gb, 3]) + " GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x2" maxRetries: 2 } @@ -669,6 +688,7 @@ task filter_bam_to_taxa { } String out_basename = basename(classified_bam, ".bam") + "." + out_filename_suffix + Int disk_size = 375 command { set -ex -o pipefail @@ -735,7 +755,8 @@ task filter_bam_to_taxa { runtime { docker: "${docker}" memory: select_first([machine_mem_gb, 26]) + " GB" - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES cpu: 4 dx_instance_type: "mem3_ssd1_v2_x4" maxRetries: 2 @@ -754,6 +775,7 @@ task kaiju { } String input_basename = basename(reads_unmapped_bam, ".bam") + Int disk_size = 375 command { set -ex -o pipefail @@ -810,7 +832,8 @@ task kaiju { docker: "${docker}" memory: select_first([machine_mem_gb, 100]) + " GB" cpu: 16 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x16" maxRetries: 2 } diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index 47c3109c5..3d58d0432 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -437,6 +437,7 @@ task sra_meta_prep { String out_name = "sra_metadata.tsv" String docker="quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 100 parameter_meta { cleaned_bam_filepaths: { description: "Unaligned bam files containing cleaned (submittable) reads.", @@ -544,7 +545,8 @@ task sra_meta_prep { docker: docker memory: "1 GB" cpu: 1 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } diff --git a/pipes/WDL/tasks/tasks_ncbi_tools.wdl b/pipes/WDL/tasks/tasks_ncbi_tools.wdl index 50286a436..4bbcea347 100644 --- a/pipes/WDL/tasks/tasks_ncbi_tools.wdl +++ b/pipes/WDL/tasks/tasks_ncbi_tools.wdl @@ -8,12 +8,11 @@ task Fetch_SRA_to_BAM { Int? machine_mem_gb String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } - + Int disk_size = 750 meta { description: "This searches NCBI SRA for accessions using the Entrez interface, collects associated metadata, and returns read sets as unaligned BAM files with metadata loaded in. Useful metadata from BioSample is also output from this task directly. This has been tested with both SRA and ENA accessions. This queries the NCBI production database, and as such, the output of this task is non-deterministic given the same input." volatile: true } - command <<< set -e # fetch SRA metadata on this record @@ -138,7 +137,8 @@ task Fetch_SRA_to_BAM { runtime { cpu: 2 memory: select_first([machine_mem_gb, 6]) + " GB" - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -152,10 +152,11 @@ task biosample_tsv_filter_preexisting { String out_basename = "biosample_attributes" String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } + Int disk_size = 50 meta { description: "This task takes a metadata TSV for submission to NCBI BioSample and filters out entries that have already been submitted to NCBI. This queries the NCBI production database, and as such, the output of this task is non-deterministic given the same input." volatile: true - } + } command <<< set -e @@ -198,7 +199,8 @@ task biosample_tsv_filter_preexisting { runtime { cpu: 2 memory: "3 GB" - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -212,6 +214,7 @@ task fetch_biosamples { String out_basename = "biosample_attributes" String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } + Int disk_size = 50 meta { description: "This searches NCBI BioSample for accessions or keywords using the Entrez interface and returns any hits in the form of a BioSample attributes TSV. This queries the NCBI production database, and as such, the output of this task is non-deterministic given the same input." volatile: true @@ -228,7 +231,8 @@ task fetch_biosamples { runtime { cpu: 2 memory: "3 GB" - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -246,7 +250,7 @@ task ncbi_sftp_upload { String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } - + Int disk_size = 100 command <<< set -e cd /opt/converter @@ -275,7 +279,8 @@ task ncbi_sftp_upload { runtime { cpu: 2 memory: "2 GB" - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 0 @@ -291,6 +296,7 @@ task sra_tsv_to_xml { String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } + Int disk_size = 50 command <<< set -e cd /opt/converter @@ -312,7 +318,8 @@ task sra_tsv_to_xml { runtime { cpu: 1 memory: "2 GB" - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -326,6 +333,7 @@ task biosample_submit_tsv_to_xml { String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } + Int disk_size = 50 meta { description: "This converts a web portal submission TSV for NCBI BioSample into an ftp-appropriate XML submission for NCBI BioSample. It does not connect to NCBI, and does not submit or fetch any data." } @@ -347,7 +355,8 @@ task biosample_submit_tsv_to_xml { runtime { cpu: 1 memory: "2 GB" - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -363,6 +372,7 @@ task biosample_submit_tsv_ftp_upload { String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } String base=basename(meta_submit_tsv, '.tsv') + Int disk_size = 100 meta { description: "This registers a table of metadata with NCBI BioSample. It accepts a TSV similar to the web UI input at submit.ncbi.nlm.nih.gov, but converts to an XML, submits via their FTP/XML API, awaits a response, and retrieves a resulting attributes table and returns that as a TSV. This task registers live data with the production NCBI database." } @@ -386,7 +396,8 @@ task biosample_submit_tsv_ftp_upload { runtime { cpu: 2 memory: "2 GB" - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 0 @@ -401,6 +412,7 @@ task biosample_xml_response_to_tsv { String docker = "quay.io/broadinstitute/ncbi-tools:2.10.7.10" } String out_name = "~{basename(meta_submit_tsv, '.tsv')}-attributes.tsv" + Int disk_size = 100 meta { description: "This converts an FTP-based XML response from BioSample into a web-portal-style attributes.tsv file with metadata and accessions. This task does not communicate with NCBI, it only parses pre-retrieved responses." } @@ -422,7 +434,8 @@ task biosample_xml_response_to_tsv { runtime { cpu: 2 memory: "2 GB" - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" docker: docker maxRetries: 2 @@ -438,6 +451,7 @@ task group_sra_bams_by_biosample { Array[String] library_strategies Array[String] seq_platforms } + Int disk_size = 100 parameter_meta { bam_filepaths: { description: "all bam files", @@ -512,7 +526,8 @@ task group_sra_bams_by_biosample { docker: "python:slim" memory: "1 GB" cpu: 1 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index a71165cb2..4c6e30a96 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -16,6 +16,7 @@ task nextclade_one_sample { String docker = "nextstrain/nextclade:2.5.0" } String basename = basename(genome_fasta, ".fasta") + Int disk_size = 50 command { set -e apt-get update @@ -68,7 +69,8 @@ task nextclade_one_sample { docker: docker memory: "3 GB" cpu: 2 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -100,6 +102,7 @@ task nextclade_many_samples { File? genome_ids_setdefault_blank String docker = "nextstrain/nextclade:2.5.0" } + Int disk_size = 100 command <<< set -e apt-get update @@ -175,7 +178,8 @@ task nextclade_many_samples { docker: docker memory: "3 GB" cpu: 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" maxRetries: 2 } @@ -278,6 +282,7 @@ task derived_cols { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 50 parameter_meta { lab_highlight_loc: { description: "This option copies the 'originating_lab' and 'submitting_lab' columns to new ones including a prefix, but only if they match certain criteria. The value of this string must be of the form prefix;col_header=value:col_header=value. For example, 'MA;country=USA:division=Massachusetts' will copy the originating_lab and submitting_lab columns to MA_originating_lab and MA_submitting_lab, but only for those rows where country=USA and division=Massachusetts." @@ -373,7 +378,8 @@ task derived_cols { docker: docker memory: "1 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -390,6 +396,7 @@ task filter_segments { Int? machine_mem_gb } + Int disk_size = 375 command <<< python3 < VERSION @@ -563,7 +572,8 @@ task nextstrain_build_subsample { docker: docker memory: select_first([machine_mem_gb, 50]) + " GB" cpu : 4 - disks: "local-disk 375 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x8" maxRetries: 2 } @@ -586,6 +596,7 @@ task nextstrain_ncov_defaults { String nextstrain_ncov_repo_commit = "30435fb9ec8de2f045167fb90adfec12f123e80a" String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 50 command { set -e wget -q "https://github.com/nextstrain/ncov/archive/~{nextstrain_ncov_repo_commit}.tar.gz" @@ -595,7 +606,8 @@ task nextstrain_ncov_defaults { docker: docker memory: "1 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -623,6 +635,8 @@ task nextstrain_deduplicate_sequences { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 375 + parameter_meta { sequences_fasta: { description: "FASTA file with multiple sequences", @@ -651,7 +665,8 @@ task nextstrain_deduplicate_sequences { docker: docker memory: "7 GB" cpu: 1 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" maxRetries: 2 } @@ -675,6 +690,8 @@ task nextstrain_ncov_sanitize_gisaid_data { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 375 + parameter_meta { sequences_gisaid_fasta: { description: "Multiple sequences downloaded from GISAID", @@ -713,7 +730,8 @@ task nextstrain_ncov_sanitize_gisaid_data { docker: docker memory: "7 GB" cpu: 1 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" maxRetries: 2 } @@ -748,6 +766,7 @@ task filter_subsample_sequences { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 100 parameter_meta { sequences_fasta: { description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.", @@ -804,7 +823,8 @@ task filter_subsample_sequences { docker: docker memory: "15 GB" cpu : 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" preemptible: 1 maxRetries: 2 @@ -831,6 +851,7 @@ task filter_sequences_to_list { String out_fname = sub(sub(basename(sequences), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 200 parameter_meta { sequences: { description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.", @@ -898,7 +919,8 @@ task filter_sequences_to_list { docker: docker memory: "7 GB" cpu : 2 - disks: "local-disk 200 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" preemptible: 1 maxRetries: 2 @@ -931,6 +953,7 @@ task mafft_one_chr { Int mem_size = 500 Int cpus = 64 } + Int disk_size = 750 command <<< set -e @@ -987,7 +1010,8 @@ task mafft_one_chr { docker: docker memory: mem_size + " GB" cpu : cpus - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES preemptible: 0 dx_instance_type: "mem3_ssd1_v2_x36" maxRetries: 2 @@ -1017,6 +1041,7 @@ task mafft_one_chr_chunked { Int mem_size = 32 Int cpus = 96 } + Int disk_size = 750 command <<< set -e @@ -1093,7 +1118,8 @@ task mafft_one_chr_chunked { docker: docker memory: mem_size + " GB" cpu : cpus - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES preemptible: 0 dx_instance_type: "mem3_ssd1_v2_x36" maxRetries: 2 @@ -1121,6 +1147,7 @@ task augur_mafft_align { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 750 command <<< set -e augur version > VERSION @@ -1140,7 +1167,8 @@ task augur_mafft_align { docker: docker memory: "180 GB" cpu : 64 - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES preemptible: 0 dx_instance_type: "mem3_ssd2_v2_x32" maxRetries: 2 @@ -1161,6 +1189,7 @@ task snp_sites { String docker = "quay.io/biocontainers/snp-sites:2.5.1--hed695b0_0" } String out_basename = basename(msa_fasta, ".fasta") + Int disk_size = 100 command { snp-sites -V > VERSION snp-sites -v ~{true="" false="-c" allow_wildcard_bases} -o "~{out_basename}.vcf" "~{msa_fasta}" @@ -1169,7 +1198,8 @@ task snp_sites { docker: docker memory: "31 GB" cpu : 2 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES preemptible: 0 dx_instance_type: "mem3_ssd1_v2_x4" maxRetries: 2 @@ -1190,6 +1220,7 @@ task augur_mask_sites { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 100 parameter_meta { sequences: { description: "Set of alignments (fasta format) or variants (vcf format) to mask.", @@ -1216,7 +1247,8 @@ task augur_mask_sites { docker: docker memory: "3 GB" cpu : 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES preemptible: 1 dx_instance_type: "mem1_ssd1_v2_x4" maxRetries: 2 @@ -1246,6 +1278,7 @@ task draft_augur_tree { Int? cpus String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 750 parameter_meta { msa_or_vcf: { description: "Set of alignments (fasta format) or variants (vcf format) to construct a tree from using augur tree (iqTree).", @@ -1272,7 +1305,8 @@ task draft_augur_tree { docker: docker memory: "32 GB" cpu: select_first([cpus, 64]) - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x36" preemptible: 0 maxRetries: 2 @@ -1313,6 +1347,7 @@ task refine_augur_tree { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 100 parameter_meta { msa_or_vcf: { description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.", @@ -1353,7 +1388,8 @@ task refine_augur_tree { docker: docker memory: "50 GB" cpu : 2 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x8" preemptible: 0 maxRetries: 2 @@ -1385,6 +1421,7 @@ task ancestral_traits { String docker = "nextstrain/base:build-20211012T204409Z" } String out_basename = basename(tree, '.nwk') + Int disk_size = 100 command <<< set -e augur version > VERSION @@ -1404,7 +1441,8 @@ task ancestral_traits { docker: docker memory: select_first([machine_mem_gb, 32]) + " GB" cpu : 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x4" preemptible: 1 maxRetries: 2 @@ -1435,6 +1473,7 @@ task ancestral_tree { String docker = "nextstrain/base:build-20211012T204409Z" } + Int disk_size = 50 parameter_meta { msa_or_vcf: { description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.", @@ -1464,7 +1503,8 @@ task ancestral_tree { docker: docker memory: "50 GB" cpu : 4 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x8" preemptible: 0 maxRetries: 2 @@ -1495,6 +1535,7 @@ task translate_augur_tree { String docker = "nextstrain/base:build-20211012T204409Z" } String out_basename = basename(tree, '.nwk') + Int disk_size = 50 command <<< set -e augur version > VERSION @@ -1511,7 +1552,8 @@ task translate_augur_tree { docker: docker memory: "2 GB" cpu : 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" preemptible: 1 maxRetries: 2 @@ -1550,6 +1592,7 @@ task tip_frequencies { String docker = "nextstrain/base:build-20211012T204409Z" String out_basename = basename(tree, '.nwk') } + Int disk_size = 100 command <<< set -e augur version > VERSION @@ -1580,7 +1623,8 @@ task tip_frequencies { docker: docker memory: select_first([machine_mem_gb, 30]) + " GB" cpu : 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd2_x4" preemptible: 1 maxRetries: 2 @@ -1608,6 +1652,7 @@ task assign_clades_to_nodes { String docker = "nextstrain/base:build-20211012T204409Z" } String out_basename = basename(basename(tree_nwk, ".nwk"), "_timetree") + Int disk_size = 50 command <<< set -e augur version > VERSION @@ -1623,7 +1668,8 @@ task assign_clades_to_nodes { docker: docker memory: "2 GB" cpu : 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" preemptible: 1 maxRetries: 2 @@ -1651,6 +1697,7 @@ task augur_import_beast { String docker = "nextstrain/base:build-20211012T204409Z" } String tree_basename = basename(beast_mcc_tree, ".tree") + Int disk_size = 50 command <<< set -e augur version > VERSION @@ -1670,7 +1717,8 @@ task augur_import_beast { docker: docker memory: select_first([machine_mem_gb, 3]) + " GB" cpu : 2 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" preemptible: 1 maxRetries: 2 @@ -1709,6 +1757,8 @@ task export_auspice_json { Int? machine_mem_gb String docker = "nextstrain/base:build-20211012T204409Z" } + + Int disk_size = 100 command <<< set -e -o pipefail @@ -1771,7 +1821,8 @@ task export_auspice_json { docker: docker memory: select_first([machine_mem_gb, 64]) + " GB" cpu : 4 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem3_ssd1_v2_x8" preemptible: 0 maxRetries: 2 diff --git a/pipes/WDL/tasks/tasks_read_utils.wdl b/pipes/WDL/tasks/tasks_read_utils.wdl index f5db1cde2..612619fd7 100644 --- a/pipes/WDL/tasks/tasks_read_utils.wdl +++ b/pipes/WDL/tasks/tasks_read_utils.wdl @@ -5,6 +5,7 @@ task max { Array[Int] list Int default_empty = 0 } + Int disk_size = 10 command <<< python3 << CODE inlist = '~{sep="*" list}'.split('*') @@ -18,7 +19,8 @@ task max { docker: "python:slim" memory: "1 GB" cpu: 1 - disks: "local-disk 10 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -28,6 +30,7 @@ task group_bams_by_sample { input { Array[File] bam_filepaths } + Int disk_size = 100 parameter_meta { bam_filepaths: { description: "all bam files", @@ -71,7 +74,8 @@ task group_bams_by_sample { docker: "python:slim" memory: "1 GB" cpu: 1 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -83,6 +87,7 @@ task get_sample_meta { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 50 command <<< python3 << CODE import os.path @@ -122,7 +127,8 @@ task get_sample_meta { docker: docker memory: "1 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -142,6 +148,8 @@ task merge_and_reheader_bams { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + + Int disk_size = 750 command { set -ex -o pipefail @@ -184,7 +192,8 @@ task merge_and_reheader_bams { docker: "${docker}" memory: "3 GB" cpu: 2 - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x4" preemptible: 0 maxRetries: 2 @@ -204,6 +213,8 @@ task rmdup_ubam { String? docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 375 + parameter_meta { reads_unmapped_bam: { description: "unaligned reads in BAM format", patterns: ["*.bam"] } method: { description: "mvicuna or cdhit" } @@ -238,7 +249,8 @@ task rmdup_ubam { docker: "${docker}" memory: select_first([machine_mem_gb, 7]) + " GB" cpu: 2 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" maxRetries: 2 } @@ -259,6 +271,8 @@ task downsample_bams { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 750 + command { set -ex -o pipefail @@ -295,7 +309,8 @@ task downsample_bams { docker: "${docker}" memory: select_first([machine_mem_gb, 3]) + " GB" cpu: 4 - disks: "local-disk 750 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" maxRetries: 2 } @@ -318,6 +333,7 @@ task FastqToUBAM { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 375 parameter_meta { fastq_1: { description: "Unaligned read1 file in fastq format", patterns: ["*.fastq", "*.fastq.gz", "*.fq", "*.fq.gz"] } fastq_2: { description: "Unaligned read2 file in fastq format. This should be empty for single-end read conversion and required for paired-end reads. If provided, it must match fastq_1 in length and order.", patterns: ["*.fastq", "*.fastq.gz", "*.fq", "*.fq.gz"] } @@ -350,7 +366,8 @@ task FastqToUBAM { docker: docker cpu: 2 memory: "3 GB" - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -366,6 +383,7 @@ task read_depths { String out_basename = basename(aligned_bam, '.bam') String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 200 command <<< set -e -o pipefail @@ -379,7 +397,8 @@ task read_depths { docker: docker cpu: 2 memory: "3 GB" - disks: "local-disk 200 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 9de1b41b7..1b401d4f5 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -19,6 +19,7 @@ task alignment_metrics { } String out_basename = basename(aligned_bam, ".bam") + Int disk_size = 150 command <<< set -e @@ -101,7 +102,8 @@ task alignment_metrics { docker: "~{docker}" memory: select_first([machine_mem_gb, 13]) + " GB" cpu: 2 - disks: "local-disk 150 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -130,6 +132,8 @@ task plot_coverage { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + + Int disk_size = 375 command { set -ex -o pipefail @@ -197,7 +201,8 @@ task plot_coverage { docker: "${docker}" memory: "7 GB" cpu: 2 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" preemptible: 1 maxRetries: 2 @@ -213,6 +218,8 @@ task coverage_report { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 375 + command { reports.py --version | tee VERSION reports.py coverage_only \ @@ -230,7 +237,8 @@ task coverage_report { docker: "${docker}" memory: "2 GB" cpu: 2 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x4" maxRetries: 2 } @@ -246,6 +254,8 @@ task assembly_bases { String docker ="ubuntu" } + Int disk_size = 50 + command { set -e grep -v '^>' "~{fasta}" | tr -d '\n' | wc -c | tee assembly_length @@ -261,7 +271,8 @@ task assembly_bases { docker: "${docker}" memory: "1 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -275,6 +286,7 @@ task fastqc { } String reads_basename=basename(reads_bam, ".bam") + Int disk_size = 375 command { set -ex -o pipefail @@ -292,7 +304,8 @@ task fastqc { memory: "2 GB" cpu: 1 docker: "${docker}" - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -310,6 +323,7 @@ task align_and_count { String reads_basename=basename(reads_bam, ".bam") String ref_basename=basename(ref_db, ".fasta") + Int disk_size = 375 command { set -ex -o pipefail @@ -339,7 +353,8 @@ task align_and_count { memory: select_first([machine_mem_gb, 15]) + " GB" cpu: 4 docker: "${docker}" - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" maxRetries: 2 } @@ -354,6 +369,8 @@ task align_and_count_summary { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 100 + command { set -ex -o pipefail @@ -370,7 +387,8 @@ task align_and_count_summary { memory: "7 GB" cpu: 8 docker: "${docker}" - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -393,6 +411,7 @@ task aggregate_metagenomics_reports { } String aggregate_taxon_heading = sub(aggregate_taxon_heading_space_separated, " ", "_") # replace spaces with underscores for use in filename + Int disk_size = 50 command { set -ex -o pipefail @@ -417,7 +436,8 @@ task aggregate_metagenomics_reports { docker: "${docker}" memory: "3 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x2" preemptible: 0 maxRetries: 2 @@ -463,6 +483,7 @@ task MultiQC { # get the basename in all wdl use the filename specified (sans ".html" extension, if specified) String report_filename = if (defined(file_name)) then basename(select_first([file_name]), ".html") else "multiqc" + Int disk_size = 375 command { set -ex -o pipefail @@ -515,7 +536,8 @@ task MultiQC { memory: "8 GB" cpu: 16 docker: "${docker}" - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem2_ssd1_v2_x2" maxRetries: 2 } @@ -530,6 +552,8 @@ task compare_two_genomes { String docker = "quay.io/broadinstitute/viral-assemble:2.1.16.1" } + Int disk_size = 50 + command <<< set -ex -o pipefail assembly.py --version | tee VERSION @@ -552,7 +576,8 @@ task compare_two_genomes { memory: "3 GB" cpu: 2 docker: docker - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" preemptible: 1 maxRetries: 2 diff --git a/pipes/WDL/tasks/tasks_sarscov2.wdl b/pipes/WDL/tasks/tasks_sarscov2.wdl index 0bdfd1327..ed975f987 100644 --- a/pipes/WDL/tasks/tasks_sarscov2.wdl +++ b/pipes/WDL/tasks/tasks_sarscov2.wdl @@ -13,6 +13,7 @@ task pangolin_one_sample { String docker = "quay.io/staphb/pangolin:4.1.2-pdata-1.14" } String basename = basename(genome_fasta, ".fasta") + Int disk_size = 50 command <<< set -ex @@ -61,7 +62,8 @@ task pangolin_one_sample { docker: docker memory: "3 GB" cpu: 2 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -93,6 +95,7 @@ task pangolin_many_samples { String basename String docker = "quay.io/staphb/pangolin:4.1.2-pdata-1.14" } + Int disk_size = 100 command <<< set -ex @@ -154,7 +157,8 @@ task pangolin_many_samples { docker: docker memory: "14 GB" cpu: 16 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x16" maxRetries: 2 } @@ -195,6 +199,7 @@ task sequencing_report { Int machine_mem_gb = 7 String docker = "quay.io/broadinstitute/sc2-rmd:0.1.25" } + Int disk_size = 250 command { set -e /docker/reports.py \ @@ -213,7 +218,8 @@ task sequencing_report { docker: docker memory: "~{machine_mem_gb} GB" cpu: 2 - disks: "local-disk 250 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -251,6 +257,7 @@ task sc2_meta_final { String docker = "quay.io/broadinstitute/py3-bio:0.1.2" } String out_basename = basename(basename(assembly_stats_tsv, '.txt'), '.tsv') + Int disk_size = 50 command <<< set -e python3< "~{output_name}" } @@ -16,7 +17,8 @@ task concatenate { docker: "ubuntu" memory: "1 GB" cpu: cpus - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -34,6 +36,7 @@ task zcat { String output_name Int cpus = 4 } + Int disk_size = 375 command <<< set -e python3 < "~{outfilename}" } @@ -133,7 +138,8 @@ task sed { docker: "ubuntu" memory: "1 GB" cpu: 1 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -149,6 +155,7 @@ task fasta_to_ids { input { File sequences_fasta } + Int disk_size = 375 String basename = basename(sequences_fasta, ".fasta") command { cat "~{sequences_fasta}" | grep \> | cut -c 2- > "~{basename}.txt" @@ -157,7 +164,8 @@ task fasta_to_ids { docker: "ubuntu" memory: "1 GB" cpu: 1 - disks: "local-disk 375 LOCAL" + disks: "local-disk " + disk_size + " LOCAL" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -170,6 +178,7 @@ task md5sum { input { File in_file } + Int disk_size = 100 command { md5sum ~{in_file} | cut -f 1 -d ' ' | tee MD5 } @@ -180,7 +189,8 @@ task md5sum { docker: "ubuntu" memory: "1 GB" cpu: 1 - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd2_v2_x2" maxRetries: 2 } @@ -193,6 +203,7 @@ task fetch_row_from_tsv { String idx_val Array[String] set_default_keys = [] } + Int disk_size = 50 command <<< python3 << CODE import csv, gzip, json @@ -217,6 +228,8 @@ task fetch_row_from_tsv { docker: "python:slim" memory: "1 GB" cpu: 1 + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES disks: "local-disk 50 HDD" dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 @@ -231,6 +244,7 @@ task fetch_col_from_tsv { Boolean drop_header = true String out_name = "~{basename(basename(tsv, '.txt'), '.tsv')}-~{col}.txt" } + Int disk_size = 50 command <<< python3 << CODE import csv, gzip @@ -255,7 +269,8 @@ task fetch_col_from_tsv { docker: "python:slim" memory: "1 GB" cpu: 1 - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -275,6 +290,8 @@ task tsv_join { Int machine_mem_gb = 7 } + Int disk_size = 50 + command <<< python3<1 || NR==1' \ ~{sep=' ' infiles} \ @@ -507,7 +535,8 @@ task cat_except_headers { memory: "1 GB" cpu: 1 docker: "ubuntu" - disks: "local-disk 50 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -516,6 +545,7 @@ task make_empty_file { input { String out_filename } + Int disk_size = 10 command { touch "~{out_filename}" } @@ -526,7 +556,8 @@ task make_empty_file { memory: "1 GB" cpu: 1 docker: "ubuntu" - disks: "local-disk 10 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -537,6 +568,7 @@ task rename_file { File infile String out_filename } + Int disk_size = 100 command { ln -s "~{infile}" "~{out_filename}" } @@ -547,7 +579,8 @@ task rename_file { memory: "1 GB" cpu: 1 docker: "ubuntu" - disks: "local-disk 100 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -557,6 +590,7 @@ task today { input { String? timezone } + Int disk_size = 10 meta { volatile: true } @@ -571,7 +605,8 @@ task today { memory: "1 GB" cpu: 1 docker: "quay.io/broadinstitute/viral-baseimage:0.1.20" - disks: "local-disk 10 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 } @@ -608,6 +643,7 @@ task s3_copy { memory: "2 GB" cpu: cpus disks: "local-disk ~{disk_gb} SSD" + disk: "~{disk_gb} GB" # TES maxRetries: 2 } } @@ -622,6 +658,7 @@ task filter_sequences_by_length { String docker = "quay.io/broadinstitute/viral-core:2.1.33" } + Int disk_size = 300 parameter_meta { sequences_fasta: { description: "Set of sequences in fasta format", @@ -660,7 +697,8 @@ task filter_sequences_by_length { docker: docker memory: "1 GB" cpu : 1 - disks: "local-disk 700 HDD" + disks: "local-disk " + disk_size + " HDD" + disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x2" maxRetries: 2 }