From 87d8bc0149f8ccebbad744fc12292aaddd1056fe Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 8 Jun 2020 16:27:31 -0400 Subject: [PATCH 01/17] add augur mask step explicitly --- pipes/WDL/tasks/tasks_nextstrain.wdl | 31 ++++++++++++++++++++++++ pipes/WDL/workflows/build_augur_tree.wdl | 11 ++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index e38348387..e16bc5381 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -186,6 +186,37 @@ task augur_mafft_align { } } +task augur_mask_sites { + meta { + description: "Mask unwanted positions from alignment. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/mask.html" + } + input { + File sequences + File? mask_bed + + String docker = "nextstrain/base" + } + String basename = basename(sequences, '.fasta') + command { + augur version > VERSION + augur mask --sequences ~{sequences} \ + --mask ~{select_first([mask_bed, "/dev/null"])} \ + --output ~{basename}_masked.fasta + } + runtime { + docker: docker + memory: "3 GB" + cpu : 2 + disks: "local-disk 100 HDD" + preemptible: 2 + dx_instance_type: "mem1_ssd1_v2_x2" + } + output { + File masked_sequences = "~{basename}_masked.fasta" + String augur_version = read_string("VERSION") + } +} + task draft_augur_tree { meta { description: "Build a tree using a variety of methods. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/tree.html" diff --git a/pipes/WDL/workflows/build_augur_tree.wdl b/pipes/WDL/workflows/build_augur_tree.wdl index 2d486b436..5cb4d69fd 100644 --- a/pipes/WDL/workflows/build_augur_tree.wdl +++ b/pipes/WDL/workflows/build_augur_tree.wdl @@ -64,15 +64,19 @@ workflow build_augur_tree { ref_fasta = ref_fasta, basename = virus } + call nextstrain.augur_mask_sites { + input: + sequences = augur_mafft_align.aligned_sequences + } call nextstrain.draft_augur_tree { input: - aligned_fasta = augur_mafft_align.aligned_sequences, + aligned_fasta = augur_mask_sites.masked_sequences, basename = virus } call nextstrain.refine_augur_tree { input: raw_tree = draft_augur_tree.aligned_tree, - aligned_fasta = augur_mafft_align.aligned_sequences, + aligned_fasta = augur_mask_sites.masked_sequences, metadata = sample_metadata, basename = virus } @@ -88,7 +92,7 @@ workflow build_augur_tree { call nextstrain.ancestral_tree { input: refined_tree = refine_augur_tree.tree_refined, - aligned_fasta = augur_mafft_align.aligned_sequences, + aligned_fasta = augur_mask_sites.masked_sequences, basename = virus } call nextstrain.translate_augur_tree { @@ -123,6 +127,7 @@ workflow build_augur_tree { output { File combined_assembly_fasta = concatenate.combined File augur_aligned_fasta = augur_mafft_align.aligned_sequences + File masked_fasta = augur_mask_sites.masked_sequences File raw_tree = draft_augur_tree.aligned_tree File refined_tree = refine_augur_tree.tree_refined File branch_lengths = refine_augur_tree.branch_lengths From 4a1b1afc16b27746a97826fb8ea667344e6bc688 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 8 Jun 2020 16:46:53 -0400 Subject: [PATCH 02/17] try out --append_run_id option in viral-core --- pipes/WDL/tasks/tasks_demux.wdl | 1 + requirements-modules.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl index 12f0c425b..36a5b9b04 100644 --- a/pipes/WDL/tasks/tasks_demux.wdl +++ b/pipes/WDL/tasks/tasks_demux.wdl @@ -194,6 +194,7 @@ task illumina_demux { --JVMmemory="$mem_in_mb"m \ $demux_threads \ ${true='--force_gc=true' false="--force_gc=false" forceGC} \ + --append_run_id \ --compression_level=5 \ --loglevel=DEBUG diff --git a/requirements-modules.txt b/requirements-modules.txt index 1bd4ad746..71b30af88 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=2.1.0 +broadinstitute/viral-core=dp-demux broadinstitute/viral-assemble=2.1.0.0 broadinstitute/viral-classify=2.1.0.0 broadinstitute/viral-phylo=2.1.0.0 From a4a8afc7936e05bcfed56992d782f85304382880 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 8 Jun 2020 17:13:59 -0400 Subject: [PATCH 03/17] augur mask doesnt like empty bed input, so fake it --- pipes/WDL/tasks/tasks_nextstrain.wdl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index e16bc5381..5c56c934b 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -199,9 +199,14 @@ task augur_mask_sites { String basename = basename(sequences, '.fasta') command { augur version > VERSION - augur mask --sequences ~{sequences} \ - --mask ~{select_first([mask_bed, "/dev/null"])} \ - --output ~{basename}_masked.fasta + BEDFILE=~{select_first([mask_bed, "/dev/null"])} + if [ -s "$BEDFILE" ]; then + augur mask --sequences ~{sequences} \ + --mask "$BEDFILE" \ + --output "~{basename}_masked.fasta" + else + cp "~{sequences}" "~{basename}_masked.fasta" + fi } runtime { docker: docker From 89383b4b59018a0c7146a08344d619813f6f28d5 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 09:47:23 -0400 Subject: [PATCH 04/17] update dxWDL 1.47 to 1.47.2 --- travis/install-wdl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/travis/install-wdl.sh b/travis/install-wdl.sh index e959db508..69742b49a 100755 --- a/travis/install-wdl.sh +++ b/travis/install-wdl.sh @@ -19,7 +19,7 @@ cached_fetch_jar_from_github () { cached_fetch_jar_from_github broadinstitute cromwell womtool 49 cached_fetch_jar_from_github broadinstitute cromwell cromwell 49 -cached_fetch_jar_from_github dnanexus dxWDL dxWDL v1.47 +cached_fetch_jar_from_github dnanexus dxWDL dxWDL v1.47.2 TGZ=dx-toolkit-v0.293.0-ubuntu-16.04-amd64.tar.gz if [ ! -f $CACHE_DIR/$TGZ ]; then From bc66cf837d4789f61309c06fd23f1149da0afc6f Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 12:16:09 -0400 Subject: [PATCH 05/17] upstream merged --- requirements-modules.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-modules.txt b/requirements-modules.txt index 71b30af88..a7e47f6c7 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=dp-demux +broadinstitute/viral-core=2.1.0-rc3 broadinstitute/viral-assemble=2.1.0.0 broadinstitute/viral-classify=2.1.0.0 broadinstitute/viral-phylo=2.1.0.0 From abd41f3e90548c1edc80b122aa8aacc7e9eac911 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 12:47:01 -0400 Subject: [PATCH 06/17] change input variable names, parameter_meta docs, and basename handling to ensure we can tolerate VCFs as well as FASTAs at all augur steps that can do that. --- pipes/WDL/tasks/tasks_nextstrain.wdl | 79 ++++++++++++++++-------- pipes/WDL/workflows/build_augur_tree.wdl | 18 +++--- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 5c56c934b..7df55cf2f 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -96,15 +96,15 @@ task filter_subsample_sequences { } parameter_meta { sequences_fasta: { - description: "Set of sequences in fasta format to subsample using augur filter. These must represent a single chromosome/segment of a genome only.", - patterns: ["*.fasta", "*.fa"] + description: "Set of sequences (unaligned fasta or aligned fasta -- one sequence per genome) or variants (vcf format) to subsample using augur filter.", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] } sample_metadata_tsv: { description: "Metadata in tab-separated text format. See https://nextstrain-augur.readthedocs.io/en/stable/faq/metadata.html for details.", patterns: ["*.txt", "*.tsv"] } } - String in_basename = basename(sequences_fasta, ".fasta") + String out_fname = sub(sub(sequences_fasta, ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") command { augur version > VERSION augur filter \ @@ -122,23 +122,24 @@ task filter_subsample_sequences { ~{"--subsample-seed " + subsample_seed} \ ~{"--exclude-where " + exclude_where} \ ~{"--include-where " + include_where} \ - --output "~{in_basename}.filtered.fasta" - cat ~{sequences_fasta} | grep \> | wc -l > IN_COUNT - cat ~{in_basename}.filtered.fasta | grep \> | wc -l > OUT_COUNT + --output "~{out_fname}" | tee STDOUT + #cat ~{sequences_fasta} | grep \> | wc -l > IN_COUNT + grep "sequences were dropped during filtering" STDOUT | cut -f 1 -d ' ' > DROP_COUNT + grep "sequences have been written out to" STDOUT | cut -f 1 -d ' ' > OUT_COUNT } runtime { docker: docker - memory: "4 GB" - cpu : 2 - disks: "local-disk 375 LOCAL" + memory: "3 GB" + cpu : 1 + disks: "local-disk 100 HDD" dx_instance_type: "mem1_ssd1_v2_x2" preemptible: 1 } output { - File filtered_fasta = "~{in_basename}.filtered.fasta" - String augur_version = read_string("VERSION") - Int sequences_in = read_int("IN_COUNT") - Int sequences_out = read_int("OUT_COUNT") + File filtered_fasta = out_fname + String augur_version = read_string("VERSION") + Int sequences_dropped = read_int("DROP_COUNT") + Int sequences_out = read_int("OUT_COUNT") } } @@ -188,7 +189,7 @@ task augur_mafft_align { task augur_mask_sites { meta { - description: "Mask unwanted positions from alignment. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/mask.html" + description: "Mask unwanted positions from alignment or SNP table. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/mask.html" } input { File sequences @@ -196,16 +197,22 @@ task augur_mask_sites { String docker = "nextstrain/base" } - String basename = basename(sequences, '.fasta') + parameter_meta { + sequences: { + description: "Set of alignments (fasta format) or variants (vcf format) to mask.", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] + } + } + String out_fname = sub(sub(sequences, ".vcf", ".masked.vcf"), ".fasta$", ".masked.fasta") command { augur version > VERSION BEDFILE=~{select_first([mask_bed, "/dev/null"])} if [ -s "$BEDFILE" ]; then augur mask --sequences ~{sequences} \ --mask "$BEDFILE" \ - --output "~{basename}_masked.fasta" + --output "~{out_fname}" else - cp "~{sequences}" "~{basename}_masked.fasta" + cp "~{sequences}" "~{out_fname}" fi } runtime { @@ -217,17 +224,17 @@ task augur_mask_sites { dx_instance_type: "mem1_ssd1_v2_x2" } output { - File masked_sequences = "~{basename}_masked.fasta" - String augur_version = read_string("VERSION") + File masked_sequences = out_fname + String augur_version = read_string("VERSION") } } task draft_augur_tree { meta { - description: "Build a tree using a variety of methods. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/tree.html" + description: "Build a tree using iqTree. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/tree.html" } input { - File aligned_fasta + File msa_or_vcf String basename String method = "iqtree" @@ -240,9 +247,15 @@ task draft_augur_tree { Int? disk_space_gb = 750 String docker = "nextstrain/base" } + parameter_meta { + msa_or_vcf: { + description: "Set of alignments (fasta format) or variants (vcf format) to construct a tree from using augur tree (iqTree).", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] + } + } command { augur version > VERSION - AUGUR_RECURSION_LIMIT=10000 augur tree --alignment ~{aligned_fasta} \ + AUGUR_RECURSION_LIMIT=10000 augur tree --alignment ~{msa_or_vcf} \ --output ~{basename}_raw_tree.nwk \ --method ~{default="iqtree" method} \ --substitution-model ~{default="GTR" substitution_model} \ @@ -267,11 +280,11 @@ task draft_augur_tree { task refine_augur_tree { meta { - description: "Refine an initial tree using sequence metadata. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/refine.html" + description: "Refine an initial tree using sequence metadata and Treetime. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/refine.html" } input { File raw_tree - File aligned_fasta + File msa_or_vcf File metadata String basename @@ -295,11 +308,17 @@ task refine_augur_tree { Int? disk_space_gb = 750 String docker = "nextstrain/base" } + parameter_meta { + msa_or_vcf: { + description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] + } + } command { augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur refine \ --tree ~{raw_tree} \ - --alignment ~{aligned_fasta} \ + --alignment ~{msa_or_vcf} \ --metadata ~{metadata} \ --output-tree ~{basename}_refined_tree.nwk \ --output-node-data ~{basename}_branch_lengths.json \ @@ -382,7 +401,7 @@ task ancestral_tree { } input { File refined_tree - File aligned_fasta + File msa_or_vcf String basename String inference = "joint" @@ -395,11 +414,17 @@ task ancestral_tree { Int? machine_mem_gb String docker = "nextstrain/base" } + parameter_meta { + msa_or_vcf: { + description: "Set of alignments (fasta format) or variants (vcf format) to use to guide Treetime.", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] + } + } command { augur version > VERSION AUGUR_RECURSION_LIMIT=10000 augur ancestral \ --tree ~{refined_tree} \ - --alignment ~{aligned_fasta} \ + --alignment ~{msa_or_vcf} \ --output-node-data ~{basename}_nt_muts.json \ ~{"--vcf-reference " + vcf_reference} \ ~{"--output-vcf " + output_vcf} \ diff --git a/pipes/WDL/workflows/build_augur_tree.wdl b/pipes/WDL/workflows/build_augur_tree.wdl index 5cb4d69fd..d26385429 100644 --- a/pipes/WDL/workflows/build_augur_tree.wdl +++ b/pipes/WDL/workflows/build_augur_tree.wdl @@ -70,15 +70,15 @@ workflow build_augur_tree { } call nextstrain.draft_augur_tree { input: - aligned_fasta = augur_mask_sites.masked_sequences, - basename = virus + msa_or_vcf = augur_mask_sites.masked_sequences, + basename = virus } call nextstrain.refine_augur_tree { input: - raw_tree = draft_augur_tree.aligned_tree, - aligned_fasta = augur_mask_sites.masked_sequences, - metadata = sample_metadata, - basename = virus + raw_tree = draft_augur_tree.aligned_tree, + msa_or_vcf = augur_mask_sites.masked_sequences, + metadata = sample_metadata, + basename = virus } if(defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer,[]]))>0) { call nextstrain.ancestral_traits { @@ -91,9 +91,9 @@ workflow build_augur_tree { } call nextstrain.ancestral_tree { input: - refined_tree = refine_augur_tree.tree_refined, - aligned_fasta = augur_mask_sites.masked_sequences, - basename = virus + refined_tree = refine_augur_tree.tree_refined, + msa_or_vcf = augur_mask_sites.masked_sequences, + basename = virus } call nextstrain.translate_augur_tree { input: From 46d6a2b04d625d78017b685bccffedd9cb9fa9b6 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 15:16:24 -0400 Subject: [PATCH 07/17] oops, remove the full path preceding the filename --- pipes/WDL/tasks/tasks_nextstrain.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 7df55cf2f..d67def77d 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -104,7 +104,7 @@ task filter_subsample_sequences { patterns: ["*.txt", "*.tsv"] } } - String out_fname = sub(sub(sequences_fasta, ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") + String out_fname = sub(sub(basename(sequences_fasta), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") command { augur version > VERSION augur filter \ @@ -203,7 +203,7 @@ task augur_mask_sites { patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] } } - String out_fname = sub(sub(sequences, ".vcf", ".masked.vcf"), ".fasta$", ".masked.fasta") + String out_fname = sub(sub(basename(sequences), ".vcf", ".masked.vcf"), ".fasta$", ".masked.fasta") command { augur version > VERSION BEDFILE=~{select_first([mask_bed, "/dev/null"])} From 9a380387e2914e952f4afe1fd847daf0b5d72314 Mon Sep 17 00:00:00 2001 From: Chris Tomkins-Tinch Date: Tue, 9 Jun 2020 15:35:49 -0400 Subject: [PATCH 08/17] viral-core 2.1.0 -> 2.1.1 --- requirements-modules.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-modules.txt b/requirements-modules.txt index 1bd4ad746..0ad5a25ad 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=2.1.0 +broadinstitute/viral-core=2.1.1 broadinstitute/viral-assemble=2.1.0.0 broadinstitute/viral-classify=2.1.0.0 broadinstitute/viral-phylo=2.1.0.0 From 1b95f753d7da4677a1d9503d9e31df789bc0dbd1 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 16:47:10 -0400 Subject: [PATCH 09/17] strip out blastx from classify_multi workflow for now --- pipes/WDL/workflows/classify_multi.wdl | 32 +------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/pipes/WDL/workflows/classify_multi.wdl b/pipes/WDL/workflows/classify_multi.wdl index e132c9a58..a8634cb5f 100644 --- a/pipes/WDL/workflows/classify_multi.wdl +++ b/pipes/WDL/workflows/classify_multi.wdl @@ -8,7 +8,7 @@ import "../tasks/tasks_reports.wdl" as reports workflow classify_multi { meta { - description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), taxonomic classification of contigs (BLASTx), and FASTQC/multiQC of reads." + description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), and FASTQC/multiQC of reads." author: "Broad Viral Genomics" email: "viral-ngs@broadinstitute.org" } @@ -23,8 +23,6 @@ workflow classify_multi { File kraken2_db_tgz File krona_taxonomy_db_kraken2_tgz - File? blast_db_tgz - File? krona_taxonomy_db_blast_tgz } parameter_meta { @@ -48,14 +46,6 @@ workflow classify_multi { description: "Krona taxonomy database containing a single file: taxonomy.tab, or possibly just a compressed taxonomy.tab", patterns: ["*.tab.zst", "*.tab.gz", "*.tab", "*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"] } - blast_db_tgz: { - description: "Pre-built BLAST database tarball containing an indexed blast database named 'nr'", - patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"] - } - krona_taxonomy_db_blast_tgz: { - description: "Krona taxonomy database: a tarball containing a taxonomy.tab file as well as accession to taxid mapping (a kraken-based taxonomy database will not suffice).", - patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"] - } ncbi_taxdump_tgz: { description: "An NCBI taxdump.tar.gz file that contains, at the minimum, a nodes.dmp and names.dmp file.", patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"] @@ -114,17 +104,8 @@ workflow classify_multi { assembler = "spades", reads_unmapped_bam = rmdup_ubam.dedup_bam, trim_clip_db = trim_clip_db, - spades_min_contig_len = 800, always_succeed = true } - if(defined(blast_db_tgz) && defined(krona_taxonomy_db_blast_tgz)) { - call metagenomics.blastx as blastx { - input: - contigs_fasta = spades.contigs_fasta, - blast_db_tgz = select_first([blast_db_tgz]), - krona_taxonomy_db_tgz = select_first([krona_taxonomy_db_blast_tgz]) - } - } } call reports.MultiQC as multiqc_raw { @@ -163,14 +144,6 @@ workflow classify_multi { out_basename = "merged-kraken2.krona" } - if(defined(blast_db_tgz) && defined(krona_taxonomy_db_blast_tgz)) { - call metagenomics.krona_merge as krona_merge_blastx { - input: - krona_reports = select_all(blastx.krona_report_html), - out_basename = "merged-spades-blastx.krona" - } - } - output { Array[File] cleaned_reads_unaligned_bams = deplete.bam_filtered_to_taxa Array[File] deduplicated_reads_unaligned = rmdup_ubam.dedup_bam @@ -187,12 +160,9 @@ workflow classify_multi { File spikein_counts = spike_summary.count_summary File kraken2_merged_krona = krona_merge_kraken2.krona_report_html File kraken2_summary = metag_summary_report.krakenuniq_aggregate_taxlevel_summary - File? blastx_merged_krona = krona_merge_blastx.krona_report_html Array[File] kraken2_summary_reports = kraken2.kraken2_summary_report Array[File] kraken2_krona_by_sample = kraken2.krona_report_html - Array[File] blastx_report_by_sample = select_all(blastx.blast_report) - Array[File] blastx_krona_by_sample = select_all(blastx.krona_report_html) String kraken2_viral_classify_version = kraken2.viralngs_version[0] String deplete_viral_classify_version = deplete.viralngs_version[0] From 7a8a3643b9c1ca9214700ccc2b21c6e149d21606 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 17:25:17 -0400 Subject: [PATCH 10/17] create a new cromwell config file that forces travis to run only one job at a time --- pipes/cromwell/cromwell.local-travis.conf | 110 ++++++++++++++++++++++ travis/tests-cromwell.sh | 4 +- 2 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 pipes/cromwell/cromwell.local-travis.conf diff --git a/pipes/cromwell/cromwell.local-travis.conf b/pipes/cromwell/cromwell.local-travis.conf new file mode 100644 index 000000000..06edf3faa --- /dev/null +++ b/pipes/cromwell/cromwell.local-travis.conf @@ -0,0 +1,110 @@ +# Documentation +# https://cromwell.readthedocs.io/en/stable/backends/Local/ + + # Define a new backend provider. + + LocalExample { + + # The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend. + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + + # The backend custom configuration. + config { + + # Optional limits on the number of concurrent jobs + concurrent-job-limit = 1 + + # If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit + # the job and then immediately return a scheduled job id. + run-in-background = true + + # `temporary-directory` creates the temporary directory for commands. + # + # If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to: + # temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)" + # + # The expression is run from the execution directory for the script. The expression must create the directory + # if it does not exist, and then return the full path to the directory. + # + # To create and return a non-random temporary directory, use something like: + # temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)" + + # `script-epilogue` configures a shell command to run after the execution of every command block. + # + # If this value is not set explicitly, the default value is `sync`, equivalent to: + # script-epilogue = "sync" + # + # To turn off the default `sync` behavior set this value to an empty string: + # script-epilogue = "" + + # `glob-link-command` specifies command used to link glob outputs, by default using hard-links. + # If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows: + # glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY" + + # The list of possible runtime custom attributes. + runtime-attributes = """ + String? docker + String? docker_user + """ + + # Submit string when there is no "docker" runtime attribute. + submit = "/usr/bin/env bash ${script}" + + # Submit string when there is a "docker" runtime attribute. + submit-docker = """ + docker run \ + --rm -i \ + ${"--user " + docker_user} \ + --entrypoint ${job_shell} \ + -v ${cwd}:${docker_cwd} \ + ${docker} ${script} + """ + + # Root directory where Cromwell writes job results. This directory must be + # visible and writeable by the Cromwell process as well as the jobs that Cromwell + # launches. + root = "cromwell-executions" + + # Root directory where Cromwell writes job results in the container. This value + # can be used to specify where the execution folder is mounted in the container. + # it is used for the construction of the docker_cwd string in the submit-docker + # value above. + dockerRoot = "/cromwell-executions" + + # File system configuration. + filesystems { + + # For SFS backends, the "local" configuration specifies how files are handled. + local { + + # Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files. + localization: [ + "hard-link", "soft-link", "copy" + ] + + # Call caching strategies + caching { + # When copying a cached result, what type of file duplication should occur. + # For more information check: https://cromwell.readthedocs.io/en/stable/backends/HPC/#shared-filesystem + duplication-strategy: [ + "hard-link", "soft-link", "copy" + ] + + # Strategy to determine if a file has been used before. + # For extended explanation and alternative strategies check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching + hashing-strategy: "md5" + + # When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash. + # If false or the md5 does not exist, will proceed with the above-defined hashing strategy. + check-sibling-md5: false + } + } + } + + # The defaults for runtime attributes if not provided. + default-runtime-attributes { + failOnStderr: false + continueOnReturnCode: 0 + } + } + } diff --git a/travis/tests-cromwell.sh b/travis/tests-cromwell.sh index 928e00aa3..8fc9a11ab 100755 --- a/travis/tests-cromwell.sh +++ b/travis/tests-cromwell.sh @@ -2,7 +2,6 @@ set -e # intentionally allow for pipe failures below mkdir -p workflows -cp *.jar pipes/WDL/workflows/*.wdl pipes/WDL/tasks/*.wdl workflows cp -r test workflows/ cd workflows @@ -13,7 +12,8 @@ for workflow in ../pipes/WDL/workflows/*.wdl; do date echo "Executing $workflow_name using Cromwell on local instance" # the "cat" is to allow a pipe failure (otherwise it halts because of set -e) - java -jar cromwell.jar run \ + java -Dconfig.file=../pipes/cromwell/cromwell.local-travis.conf \ + -jar ../cromwell.jar run \ $workflow_name.wdl \ -i $input_json | tee cromwell.out if [ ${PIPESTATUS[0]} -gt 0 ]; then From 995256da9f4fda6b84e1d48c28f0af7c4c5d383a Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 17:41:13 -0400 Subject: [PATCH 11/17] begin work on augur_from_msa workflow, rename other nextstrain wdls to similar augur_from_x naming scheme --- ...o_auspice.wdl => augur_from_beast_mcc.wdl} | 2 +- pipes/WDL/workflows/augur_from_msa.wdl | 123 ++++++++++++++++++ ...k_to_auspice.wdl => augur_from_newick.wdl} | 2 +- pipes/WDL/workflows/build_augur_tree.wdl | 2 +- 4 files changed, 126 insertions(+), 3 deletions(-) rename pipes/WDL/workflows/{beast_to_auspice.wdl => augur_from_beast_mcc.wdl} (97%) create mode 100644 pipes/WDL/workflows/augur_from_msa.wdl rename pipes/WDL/workflows/{newick_to_auspice.wdl => augur_from_newick.wdl} (94%) diff --git a/pipes/WDL/workflows/beast_to_auspice.wdl b/pipes/WDL/workflows/augur_from_beast_mcc.wdl similarity index 97% rename from pipes/WDL/workflows/beast_to_auspice.wdl rename to pipes/WDL/workflows/augur_from_beast_mcc.wdl index 67cd2baac..70c95df50 100644 --- a/pipes/WDL/workflows/beast_to_auspice.wdl +++ b/pipes/WDL/workflows/augur_from_beast_mcc.wdl @@ -2,7 +2,7 @@ version 1.0 import "../tasks/tasks_nextstrain.wdl" as nextstrain -workflow beast_to_auspice { +workflow augur_from_beast_mcc { meta { description: "Visualize BEAST output with Nextstrain. This workflow converts a BEAST MCC tree (.tree file) into an Auspice v2 json file. See https://nextstrain-augur.readthedocs.io/en/stable/faq/import-beast.html for details." author: "Broad Viral Genomics" diff --git a/pipes/WDL/workflows/augur_from_msa.wdl b/pipes/WDL/workflows/augur_from_msa.wdl new file mode 100644 index 000000000..cd1f73ff5 --- /dev/null +++ b/pipes/WDL/workflows/augur_from_msa.wdl @@ -0,0 +1,123 @@ +version 1.0 + +import "../tasks/tasks_nextstrain.wdl" as nextstrain + +workflow augur_from_msa { + meta { + description: "Build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/" + author: "Broad Viral Genomics" + email: "viral-ngs@broadinstitute.org" + } + + input { + File msa_or_vcf + File sample_metadata + String virus + File ref_fasta + File genbank_gb + File? clades_tsv + Array[String]? ancestral_traits_to_infer + } + + parameter_meta { + msa_or_vcf: { + description: "Multiple sequence alignment (aligned fasta) or variants (vcf format).", + patterns: ["*.fasta", "*.fa", "*.vcf", "*.vcf.gz"] + } + sample_metadata: { + description: "Metadata in tab-separated text format. See https://nextstrain-augur.readthedocs.io/en/stable/faq/metadata.html for details.", + patterns: ["*.txt", "*.tsv"] + } + virus: { + description: "A filename-friendly string that is used as a base for output file names." + } + ref_fasta: { + description: "A reference assembly (not included in assembly_fastas) to align assembly_fastas against. Typically from NCBI RefSeq or similar.", + patterns: ["*.fasta", "*.fa"] + } + genbank_gb: { + description: "A 'genbank' formatted gene annotation file that is used to calculate coding consequences of observed mutations. Must correspond to the same coordinate space as ref_fasta. Typically downloaded from the same NCBI accession number as ref_fasta.", + patterns: ["*.gb", "*.gbf"] + } + ancestral_traits_to_infer: { + description: "A list of metadata traits to use for ancestral node inference (see https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/traits.html). Multiple traits may be specified; must correspond exactly to column headers in metadata file. Omitting these values will skip ancestral trait inference, and ancestral nodes will not have estimated values for metadata." + } + clades_tsv: { + description: "A TSV file containing clade mutation positions in four columns: [clade gene site alt]; see: https://nextstrain.org/docs/tutorials/defining-clades", + patterns: ["*.tsv", "*.txt"] + } + } + + call nextstrain.augur_mask_sites { + input: + sequences = msa_or_vcf + } + call nextstrain.draft_augur_tree { + input: + msa_or_vcf = augur_mask_sites.masked_sequences, + basename = virus + } + call nextstrain.refine_augur_tree { + input: + raw_tree = draft_augur_tree.aligned_tree, + msa_or_vcf = augur_mask_sites.masked_sequences, + metadata = sample_metadata, + basename = virus + } + if(defined(ancestral_traits_to_infer) && length(select_first([ancestral_traits_to_infer,[]]))>0) { + call nextstrain.ancestral_traits { + input: + tree = refine_augur_tree.tree_refined, + metadata = sample_metadata, + columns = select_first([ancestral_traits_to_infer,[]]), + basename = virus + } + } + call nextstrain.ancestral_tree { + input: + refined_tree = refine_augur_tree.tree_refined, + msa_or_vcf = augur_mask_sites.masked_sequences, + basename = virus + } + call nextstrain.translate_augur_tree { + input: + basename = virus, + refined_tree = refine_augur_tree.tree_refined, + nt_muts = ancestral_tree.nt_muts_json, + genbank_gb = genbank_gb + } + if(defined(clades_tsv)) { + call nextstrain.assign_clades_to_nodes { + input: + tree_nwk = refine_augur_tree.tree_refined, + nt_muts_json = ancestral_tree.nt_muts_json, + aa_muts_json = translate_augur_tree.aa_muts_json, + ref_fasta = ref_fasta, + clades_tsv = select_first([clades_tsv]) + } + } + call nextstrain.export_auspice_json { + input: + tree = refine_augur_tree.tree_refined, + sample_metadata = sample_metadata, + node_data_jsons = select_all([ + refine_augur_tree.branch_lengths, + ancestral_traits.node_data_json, + ancestral_tree.nt_muts_json, + translate_augur_tree.aa_muts_json, + assign_clades_to_nodes.node_clade_data_json]) + } + + output { + File masked_fasta = augur_mask_sites.masked_sequences + File raw_tree = draft_augur_tree.aligned_tree + File refined_tree = refine_augur_tree.tree_refined + File branch_lengths = refine_augur_tree.branch_lengths + File json_nt_muts = ancestral_tree.nt_muts_json + File ancestral_sequences_fasta = ancestral_tree.sequences + File json_aa_muts = translate_augur_tree.aa_muts_json + File? node_clade_data_json = assign_clades_to_nodes.node_clade_data_json + File? json_ancestral_traits = ancestral_traits.node_data_json + File auspice_input_json = export_auspice_json.virus_json + } +} diff --git a/pipes/WDL/workflows/newick_to_auspice.wdl b/pipes/WDL/workflows/augur_from_newick.wdl similarity index 94% rename from pipes/WDL/workflows/newick_to_auspice.wdl rename to pipes/WDL/workflows/augur_from_newick.wdl index 093a2b713..58d3c3ea0 100644 --- a/pipes/WDL/workflows/newick_to_auspice.wdl +++ b/pipes/WDL/workflows/augur_from_newick.wdl @@ -2,7 +2,7 @@ version 1.0 import "../tasks/tasks_nextstrain.wdl" as nextstrain -workflow newick_to_auspice { +workflow augur_from_newick { meta { description: "Convert a newick formatted phylogenetic tree into a json suitable for auspice visualization. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/export.html" author: "Broad Viral Genomics" diff --git a/pipes/WDL/workflows/build_augur_tree.wdl b/pipes/WDL/workflows/build_augur_tree.wdl index d26385429..74bb68c11 100644 --- a/pipes/WDL/workflows/build_augur_tree.wdl +++ b/pipes/WDL/workflows/build_augur_tree.wdl @@ -127,7 +127,7 @@ workflow build_augur_tree { output { File combined_assembly_fasta = concatenate.combined File augur_aligned_fasta = augur_mafft_align.aligned_sequences - File masked_fasta = augur_mask_sites.masked_sequences + File masked_fasta = augur_mask_sites.masked_sequences File raw_tree = draft_augur_tree.aligned_tree File refined_tree = refine_augur_tree.tree_refined File branch_lengths = refine_augur_tree.branch_lengths From 09305b24f1198fa1899d7a40bb5273fa8c6a9f6e Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 17:43:38 -0400 Subject: [PATCH 12/17] prepend path to wdl --- travis/tests-cromwell.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/travis/tests-cromwell.sh b/travis/tests-cromwell.sh index 8fc9a11ab..6d1bc4552 100755 --- a/travis/tests-cromwell.sh +++ b/travis/tests-cromwell.sh @@ -14,7 +14,7 @@ for workflow in ../pipes/WDL/workflows/*.wdl; do # the "cat" is to allow a pipe failure (otherwise it halts because of set -e) java -Dconfig.file=../pipes/cromwell/cromwell.local-travis.conf \ -jar ../cromwell.jar run \ - $workflow_name.wdl \ + ../pipes/WDL/workflows/$workflow_name.wdl \ -i $input_json | tee cromwell.out if [ ${PIPESTATUS[0]} -gt 0 ]; then echo "error running $workflow_name" From 4c77cf8b5105f5e872964a57086c80b9c3fadd15 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Tue, 9 Jun 2020 17:56:25 -0400 Subject: [PATCH 13/17] bump viral-assemble, viral-classify, viral-phylo 2.1.0.0 -> 2.1.1.0 --- requirements-modules.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements-modules.txt b/requirements-modules.txt index 0ad5a25ad..23e7fde5e 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,7 +1,7 @@ broadinstitute/viral-core=2.1.1 -broadinstitute/viral-assemble=2.1.0.0 -broadinstitute/viral-classify=2.1.0.0 -broadinstitute/viral-phylo=2.1.0.0 +broadinstitute/viral-assemble=2.1.1.0 +broadinstitute/viral-classify=2.1.1.0 +broadinstitute/viral-phylo=2.1.1.0 broadinstitute/beast-beagle-cuda=1.10.5 broadinstitute/ncbi-tools=2.10.7.0 nextstrain/base=build-20200529T044753Z From 33188e8ddc01da2c0b862f134f09866c8d609c08 Mon Sep 17 00:00:00 2001 From: Chris Tomkins-Tinch Date: Tue, 9 Jun 2020 17:56:58 -0400 Subject: [PATCH 14/17] bugfix dxid reference for CONSOLIDATE_RUN_TARBALLS_APPLET (#104) --- travis/build-dx.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/travis/build-dx.sh b/travis/build-dx.sh index 2a9788559..cd7ea823d 100755 --- a/travis/build-dx.sh +++ b/travis/build-dx.sh @@ -53,7 +53,7 @@ done # build consolidate_run_tarballs (native DNAnexus applet) applet pushd pipes/dnax/dx-launcher cp consolidate_run_tarballs.yml consolidate_run_tarballs_dxapp.yml -dx_id=$(./dx-yml-build consolidate_run_tarballs_dxapp.yml -a --destination /build/$VERSION/ | jq -r ".id") +consolidate_tarballs_dx_id=$(./dx-yml-build consolidate_run_tarballs_dxapp.yml -a --destination /build/$VERSION/ | jq -r ".id") popd echo -e "consolidate_run_tarballs\t$dx_id" >> $COMPILE_SUCCESS @@ -66,7 +66,7 @@ for wf_name in $(echo "${demux_workflows_to_build}"); do pushd pipes/dnax/dx-launcher sed "s/DEFAULT_DEMUX_WORKFLOW_ID/$demux_workflow_id/" demux_launcher.yml \ | sed "s/DEFAULT_DEMUX_WORKFLOW_NAME/${wf_name}_launcher/" \ - | sed "s/DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID/$dx_id/" > "${wf_name}_dxapp.yml" + | sed "s/DEFAULT_CONSOLIDATE_RUN_TARBALLS_APPLET_ID/$consolidate_tarballs_dx_id/" > "${wf_name}_dxapp.yml" dx_id=$(./dx-yml-build ${wf_name}_dxapp.yml -a --destination /build/$VERSION/ | jq -r ".id") popd echo -e "${wf_name}_launcher\t$dx_id" >> $COMPILE_SUCCESS From 323f8e9a84ebf467759bf0401ac16e1f52424cb8 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Tue, 9 Jun 2020 18:26:02 -0400 Subject: [PATCH 15/17] revert bits of path handling for simplicity --- travis/tests-cromwell.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/travis/tests-cromwell.sh b/travis/tests-cromwell.sh index 6d1bc4552..d9daf5fac 100755 --- a/travis/tests-cromwell.sh +++ b/travis/tests-cromwell.sh @@ -2,6 +2,7 @@ set -e # intentionally allow for pipe failures below mkdir -p workflows +cp *.jar pipes/WDL/workflows/*.wdl pipes/WDL/tasks/*.wdl workflows cp -r test workflows/ cd workflows @@ -13,8 +14,8 @@ for workflow in ../pipes/WDL/workflows/*.wdl; do echo "Executing $workflow_name using Cromwell on local instance" # the "cat" is to allow a pipe failure (otherwise it halts because of set -e) java -Dconfig.file=../pipes/cromwell/cromwell.local-travis.conf \ - -jar ../cromwell.jar run \ - ../pipes/WDL/workflows/$workflow_name.wdl \ + -jar cromwell.jar run \ + $workflow_name.wdl \ -i $input_json | tee cromwell.out if [ ${PIPESTATUS[0]} -gt 0 ]; then echo "error running $workflow_name" From f59311259ce194251676cf0f227b04a61cf3b5b8 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 10 Jun 2020 07:08:46 -0400 Subject: [PATCH 16/17] add perf monitoring stats to task outputs --- pipes/WDL/tasks/tasks_nextstrain.wdl | 51 ++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index d67def77d..7e47f3fd7 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -170,7 +170,6 @@ task augur_mafft_align { ~{true="--remove-reference" false="" remove_reference} \ --debug \ --nthreads auto - cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes } runtime { docker: docker @@ -181,8 +180,9 @@ task augur_mafft_align { dx_instance_type: "mem3_ssd2_v2_x16" } output { - File aligned_sequences = "~{basename}_aligned.fasta" - File align_troubleshoot = stdout() + File aligned_sequences = "~{basename}_aligned.fasta" + File align_troubleshoot = stdout() + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -224,7 +224,8 @@ task augur_mask_sites { dx_instance_type: "mem1_ssd1_v2_x2" } output { - File masked_sequences = out_fname + File masked_sequences = out_fname + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -263,6 +264,8 @@ task draft_augur_tree { ~{"--vcf-reference " + vcf_reference} \ ~{"--tree-builder-args " + tree_builder_args} \ --nthreads auto + cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC + cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M } runtime { docker: docker @@ -273,7 +276,10 @@ task draft_augur_tree { preemptible: 0 } output { - File aligned_tree = "~{basename}_raw_tree.nwk" + File aligned_tree = "~{basename}_raw_tree.nwk" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int runtime_sec = ceil(read_float("UPTIME_SEC")) + Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") } } @@ -338,6 +344,8 @@ task refine_augur_tree { ~{true="--keep-polytomies" false="" keep_polytomies} \ ~{true="--date-confidence" false="" date_confidence} \ ~{"--vcf-reference " + vcf_reference} + cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC + cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M } runtime { docker: docker @@ -348,8 +356,11 @@ task refine_augur_tree { preemptible: 0 } output { - File tree_refined = "~{basename}_refined_tree.nwk" - File branch_lengths = "~{basename}_branch_lengths.json" + File tree_refined = "~{basename}_refined_tree.nwk" + File branch_lengths = "~{basename}_branch_lengths.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int runtime_sec = ceil(read_float("UPTIME_SEC")) + Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") } } @@ -390,7 +401,8 @@ task ancestral_traits { preemptible: 2 } output { - File node_data_json = "~{basename}_nodes.json" + File node_data_json = "~{basename}_nodes.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -433,6 +445,8 @@ task ancestral_tree { --inference ~{default="joint" inference} \ ~{true="--keep-ambiguous" false="" keep_ambiguous} \ ~{true="--infer-ambiguous" false="" infer_ambiguous} + cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC + cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M } runtime { docker: docker @@ -443,8 +457,11 @@ task ancestral_tree { preemptible: 2 } output { - File nt_muts_json = "~{basename}_nt_muts.json" - File sequences = "~{basename}_ancestral_sequences.fasta" + File nt_muts_json = "~{basename}_nt_muts.json" + File sequences = "~{basename}_ancestral_sequences.fasta" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int runtime_sec = ceil(read_float("UPTIME_SEC")) + Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") } } @@ -485,7 +502,8 @@ task translate_augur_tree { preemptible: 2 } output { - File aa_muts_json = "~{basename}_aa_muts.json" + File aa_muts_json = "~{basename}_aa_muts.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -522,7 +540,8 @@ task assign_clades_to_nodes { preemptible: 2 } output { - File node_clade_data_json = "~{out_basename}_node-clade-assignments.json" + File node_clade_data_json = "~{out_basename}_node-clade-assignments.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -563,8 +582,9 @@ task augur_import_beast { preemptible: 2 } output { - File tree_newick = "~{tree_basename}.nwk" - File node_data_json = "~{tree_basename}.json" + File tree_newick = "~{tree_basename}.nwk" + File node_data_json = "~{tree_basename}.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } @@ -646,7 +666,8 @@ task export_auspice_json { preemptible: 2 } output { - File virus_json = "~{out_basename}_auspice.json" + File virus_json = "~{out_basename}_auspice.json" + Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) String augur_version = read_string("VERSION") } } From 118336ed56165ec38e0e1d546aee40f206b2b147 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Wed, 10 Jun 2020 07:55:47 -0400 Subject: [PATCH 17/17] copy data from inside container to host --- pipes/WDL/tasks/tasks_nextstrain.wdl | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 7e47f3fd7..ecbbb19c6 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -170,6 +170,7 @@ task augur_mafft_align { ~{true="--remove-reference" false="" remove_reference} \ --debug \ --nthreads auto + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -182,7 +183,7 @@ task augur_mafft_align { output { File aligned_sequences = "~{basename}_aligned.fasta" File align_troubleshoot = stdout() - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } } @@ -214,6 +215,7 @@ task augur_mask_sites { else cp "~{sequences}" "~{out_fname}" fi + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -225,7 +227,7 @@ task augur_mask_sites { } output { File masked_sequences = out_fname - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } } @@ -266,6 +268,7 @@ task draft_augur_tree { --nthreads auto cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -277,7 +280,7 @@ task draft_augur_tree { } output { File aligned_tree = "~{basename}_raw_tree.nwk" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) Int runtime_sec = ceil(read_float("UPTIME_SEC")) Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") @@ -346,6 +349,7 @@ task refine_augur_tree { ~{"--vcf-reference " + vcf_reference} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -358,7 +362,7 @@ task refine_augur_tree { output { File tree_refined = "~{basename}_refined_tree.nwk" File branch_lengths = "~{basename}_branch_lengths.json" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) Int runtime_sec = ceil(read_float("UPTIME_SEC")) Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") @@ -391,6 +395,7 @@ task ancestral_traits { --output-node-data "~{basename}_nodes.json" \ ~{"--weights " + weights} \ ~{true="--confidence" false="" confidence} + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -402,7 +407,7 @@ task ancestral_traits { } output { File node_data_json = "~{basename}_nodes.json" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } } @@ -447,6 +452,7 @@ task ancestral_tree { ~{true="--infer-ambiguous" false="" infer_ambiguous} cat /proc/uptime | cut -f 1 -d ' ' > UPTIME_SEC cat /proc/loadavg | cut -f 3 -d ' ' > LOAD_15M + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -459,7 +465,7 @@ task ancestral_tree { output { File nt_muts_json = "~{basename}_nt_muts.json" File sequences = "~{basename}_ancestral_sequences.fasta" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) Int runtime_sec = ceil(read_float("UPTIME_SEC")) Int cpu_load_15min = ceil(read_float("LOAD_15M")) String augur_version = read_string("VERSION") @@ -492,6 +498,7 @@ task translate_augur_tree { ~{"--vcf-reference " + vcf_reference} \ ~{"--genes " + genes} \ --output-node-data ~{basename}_aa_muts.json + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -503,7 +510,7 @@ task translate_augur_tree { } output { File aa_muts_json = "~{basename}_aa_muts.json" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } } @@ -530,6 +537,7 @@ task assign_clades_to_nodes { --reference ~{ref_fasta} \ --clades ~{clades_tsv} \ --output-node-data ~{out_basename}_node-clade-assignments.json + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -541,7 +549,7 @@ task assign_clades_to_nodes { } output { File node_clade_data_json = "~{out_basename}_node-clade-assignments.json" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } } @@ -572,6 +580,7 @@ task augur_import_beast { ~{"--tip-date-regex " + tip_date_regex} \ ~{"--tip-date-format " + tip_date_format} \ ~{"--tip-date-delimeter " + tip_date_delimiter} + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) } runtime { docker: docker @@ -656,6 +665,7 @@ task export_auspice_json { ~{"--colors " + colors_tsv} \ ~{"--description " + description_md} \ --output ~{out_basename}_auspice.json) + cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MEM_BYTES } runtime { docker: docker @@ -667,7 +677,7 @@ task export_auspice_json { } output { File virus_json = "~{out_basename}_auspice.json" - Int max_ram_gb = ceil(read_float("/sys/fs/cgroup/memory/memory.max_usage_in_bytes")/1000000000) + Int max_ram_gb = ceil(read_float("MEM_BYTES")/1000000000) String augur_version = read_string("VERSION") } }