From 4eb3aebe9259386d7b4adb34363c5f07b9fbdd99 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 4 Mar 2024 14:15:16 -0500 Subject: [PATCH 01/13] add functionality to optionally filter reads to properly mapped airs in tasks_reports.wdl::align_and_count(); make this the default add functionality to optionally filter reads to include only properly mapped airs in tasks_reports.wdl::align_and_count(); make this the default for align_and_count by setting the task input filter_bam_to_proper_primary_mapped_reads=true. --- pipes/WDL/tasks/tasks_reports.wdl | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index d38faa7ce..176a0f83b 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -392,6 +392,10 @@ task align_and_count { File ref_db Int topNHits = 3 + Boolean? filter_bam_to_proper_primary_mapped_reads = true + Boolean? do_not_require_proper_mapped_pairs_when_filtering = false + Boolean? keep_singletons_when_filtering = false + Int? machine_mem_gb String docker = "quay.io/broadinstitute/viral-core:2.2.4" } @@ -411,6 +415,18 @@ task align_and_count { pattern: ["*.FASTA"], category: "required" } + filter_bam_to_proper_primary_mapped_reads: { + description: "If specified, reads till be filtered after alignment to include only those flagged as properly paired.", + category: "optional" + } + do_not_require_proper_mapped_pairs_when_filtering: { + description: "Do not require reads to be properly paired when filtering", + category: "optional" + } + keep_singletons_when_filtering: { + description: "Keep singletons when filtering", + category: "optional" + } } command { set -ex -o pipefail @@ -422,6 +438,9 @@ task align_and_count { "${reads_basename}.bam" \ "${ref_db}" \ --outStats "${reads_basename}.count.${ref_basename}.txt.unsorted" \ + ${true="--filterReadsAfterAlignment" false="" filter_bam_to_proper_primary_mapped_reads} \ + ${true="--doNotRequirePairsToBeProper" false="" do_not_require_proper_mapped_pairs_when_filtering} \ + ${true="--keepSingletons" false="" keep_singletons_when_filtering} \ --loglevel=DEBUG sort -b -r -n -k3 "${reads_basename}.count.${ref_basename}.txt.unsorted" > "${reads_basename}.count.${ref_basename}.txt" From 415bfe40d4d652be34994b10e4609d4b3af7d86f Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 4 Mar 2024 14:17:54 -0500 Subject: [PATCH 02/13] update version of viral-core viral-core 2.2.4->2.3.0 --- pipes/WDL/tasks/tasks_assembly.wdl | 4 ++-- pipes/WDL/tasks/tasks_demux.wdl | 4 ++-- pipes/WDL/tasks/tasks_interhost.wdl | 2 +- pipes/WDL/tasks/tasks_ncbi.wdl | 6 +++--- pipes/WDL/tasks/tasks_nextstrain.wdl | 3 +-- pipes/WDL/tasks/tasks_read_utils.wdl | 14 +++++++------- pipes/WDL/tasks/tasks_reports.wdl | 12 ++++++------ pipes/WDL/tasks/tasks_taxon_filter.wdl | 2 +- pipes/WDL/tasks/tasks_utils.wdl | 8 ++++---- requirements-modules.txt | 2 +- 10 files changed, 28 insertions(+), 29 deletions(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index a0ff7e0fe..cab357939 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -450,7 +450,7 @@ task align_reads { Boolean skip_mark_dupes = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") } @@ -846,7 +846,7 @@ task run_discordance { String out_basename = "run" Int min_coverage = 4 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { reads_aligned_bam: { diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl index c3e2d4ff5..7ad347fca 100644 --- a/pipes/WDL/tasks/tasks_demux.wdl +++ b/pipes/WDL/tasks/tasks_demux.wdl @@ -6,7 +6,7 @@ task merge_tarballs { String out_filename Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 2625 @@ -163,7 +163,7 @@ task illumina_demux { Int? machine_mem_gb Int disk_size = 2625 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl index daa2aca22..a3f657387 100644 --- a/pipes/WDL/tasks/tasks_interhost.wdl +++ b/pipes/WDL/tasks/tasks_interhost.wdl @@ -351,7 +351,7 @@ task index_ref { File? novocraft_license Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index e7da74972..6d4c02c1c 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -192,7 +192,7 @@ task structured_comments { File? filter_to_ids - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String out_base = basename(assembly_stats_tsv, '.txt') command <<< @@ -272,7 +272,7 @@ task rename_fasta_header { String out_basename = basename(genome_fasta, ".fasta") - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } command { set -e @@ -437,7 +437,7 @@ task sra_meta_prep { Boolean paired String out_name = "sra_metadata.tsv" - String docker="quay.io/broadinstitute/viral-core:2.2.4" + String docker="quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 parameter_meta { diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 614d6a93e..ac655c84f 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -280,7 +280,7 @@ task derived_cols { String? lab_highlight_loc Array[File] table_map = [] - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" Int disk_size = 50 } parameter_meta { @@ -848,7 +848,6 @@ task filter_sequences_to_list { String out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") # Prior docker image: "nextstrain/base:build-20211012T204409Z" - String docker = "quay.io/broadinstitute/viral-core:2.2.4" Int disk_size = 750 } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_read_utils.wdl b/pipes/WDL/tasks/tasks_read_utils.wdl index 6a4720651..195198e0b 100644 --- a/pipes/WDL/tasks/tasks_read_utils.wdl +++ b/pipes/WDL/tasks/tasks_read_utils.wdl @@ -84,7 +84,7 @@ task group_bams_by_sample { task get_bam_samplename { input { File bam - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = round(size(bam, "GB")) + 50 command <<< @@ -111,7 +111,7 @@ task get_sample_meta { input { Array[File] samplesheets_extended - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 50 command <<< @@ -172,7 +172,7 @@ task merge_and_reheader_bams { File? reheader_table String out_basename = basename(in_bams[0], ".bam") - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 @@ -244,7 +244,7 @@ task rmdup_ubam { String method = "mvicuna" Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -303,7 +303,7 @@ task downsample_bams { Boolean deduplicateAfter = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 @@ -367,7 +367,7 @@ task FastqToUBAM { String? sequencing_center String? additional_picard_options - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 parameter_meta { @@ -418,7 +418,7 @@ task read_depths { File aligned_bam String out_basename = basename(aligned_bam, '.bam') - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 200 command <<< diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 176a0f83b..8d2501f9e 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -15,7 +15,7 @@ task alignment_metrics { Int max_amplicons=500 Int machine_mem_gb=13 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String out_basename = basename(aligned_bam, ".bam") @@ -136,7 +136,7 @@ task plot_coverage { String? plotXLimits # of the form "min max" (ints, space between) String? plotYLimits # of the form "min max" (ints, space between) - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -283,7 +283,7 @@ task coverage_report { Array[File] mapped_bam_idx # optional.. speeds it up if you provide it, otherwise we auto-index String out_report_name = "coverage_report.txt" - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -350,7 +350,7 @@ task fastqc { input { File reads_bam - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { reads_bam:{ @@ -397,7 +397,7 @@ task align_and_count { Boolean? keep_singletons_when_filtering = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String reads_basename=basename(reads_bam, ".bam") @@ -472,7 +472,7 @@ task align_and_count_summary { String output_prefix = "count_summary" - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_taxon_filter.wdl b/pipes/WDL/tasks/tasks_taxon_filter.wdl index 7da05cf2b..0907bc3f9 100644 --- a/pipes/WDL/tasks/tasks_taxon_filter.wdl +++ b/pipes/WDL/tasks/tasks_taxon_filter.wdl @@ -211,7 +211,7 @@ task merge_one_per_sample { Boolean rmdup = false Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl index bbef0044c..293f907eb 100644 --- a/pipes/WDL/tasks/tasks_utils.wdl +++ b/pipes/WDL/tasks/tasks_utils.wdl @@ -104,7 +104,7 @@ task zcat { { if [ -f /sys/fs/cgroup/memory.peak ]; then cat /sys/fs/cgroup/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.peak ]; then cat /sys/fs/cgroup/memory/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.max_usage_in_bytes ]; then cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes; else echo "0"; fi } > MEM_BYTES >>> runtime { - docker: "quay.io/broadinstitute/viral-core:2.2.4" + docker: "quay.io/broadinstitute/viral-core:2.3.0" memory: "1 GB" cpu: cpus disks: "local-disk " + disk_size + " LOCAL" @@ -399,7 +399,7 @@ task tsv_join { runtime { memory: "~{machine_mem_gb} GB" cpu: 4 - docker: "quay.io/broadinstitute/viral-core:2.2.4" + docker: "quay.io/broadinstitute/viral-core:2.3.0" disks: "local-disk " + disk_size + " HDD" disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" @@ -486,7 +486,7 @@ task tsv_stack { input { Array[File]+ input_tsvs String out_basename - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 50 @@ -749,7 +749,7 @@ task filter_sequences_by_length { File sequences_fasta Int min_non_N = 1 - String docker = "quay.io/broadinstitute/viral-core:2.2.4" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" Int disk_size = 750 } parameter_meta { diff --git a/requirements-modules.txt b/requirements-modules.txt index d0f852682..53718d00a 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=2.2.4 +broadinstitute/viral-core=2.3.0 broadinstitute/viral-assemble=2.2.4.0 broadinstitute/viral-classify=2.2.4.0 broadinstitute/viral-phylo=2.1.20.2 From 9abd348bb61c1acb30108e4b197ef6ad054ae616 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Mon, 4 Mar 2024 14:35:30 -0500 Subject: [PATCH 03/13] bump viral-core 2.3.0 -> 2.3.1 --- pipes/WDL/tasks/tasks_assembly.wdl | 4 ++-- pipes/WDL/tasks/tasks_demux.wdl | 4 ++-- pipes/WDL/tasks/tasks_interhost.wdl | 2 +- pipes/WDL/tasks/tasks_ncbi.wdl | 6 +++--- pipes/WDL/tasks/tasks_nextstrain.wdl | 3 ++- pipes/WDL/tasks/tasks_read_utils.wdl | 14 +++++++------- pipes/WDL/tasks/tasks_reports.wdl | 12 ++++++------ pipes/WDL/tasks/tasks_taxon_filter.wdl | 2 +- pipes/WDL/tasks/tasks_utils.wdl | 8 ++++---- requirements-modules.txt | 2 +- 10 files changed, 29 insertions(+), 28 deletions(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index cab357939..3bfbcd2a1 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -450,7 +450,7 @@ task align_reads { Boolean skip_mark_dupes = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") } @@ -846,7 +846,7 @@ task run_discordance { String out_basename = "run" Int min_coverage = 4 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } parameter_meta { reads_aligned_bam: { diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl index 7ad347fca..ba5dc587f 100644 --- a/pipes/WDL/tasks/tasks_demux.wdl +++ b/pipes/WDL/tasks/tasks_demux.wdl @@ -6,7 +6,7 @@ task merge_tarballs { String out_filename Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 2625 @@ -163,7 +163,7 @@ task illumina_demux { Int? machine_mem_gb Int disk_size = 2625 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl index a3f657387..0c1f1f19c 100644 --- a/pipes/WDL/tasks/tasks_interhost.wdl +++ b/pipes/WDL/tasks/tasks_interhost.wdl @@ -351,7 +351,7 @@ task index_ref { File? novocraft_license Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index 6d4c02c1c..17757f862 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -192,7 +192,7 @@ task structured_comments { File? filter_to_ids - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } String out_base = basename(assembly_stats_tsv, '.txt') command <<< @@ -272,7 +272,7 @@ task rename_fasta_header { String out_basename = basename(genome_fasta, ".fasta") - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } command { set -e @@ -437,7 +437,7 @@ task sra_meta_prep { Boolean paired String out_name = "sra_metadata.tsv" - String docker="quay.io/broadinstitute/viral-core:2.3.0" + String docker="quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 100 parameter_meta { diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index ac655c84f..cc64c6321 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -280,7 +280,7 @@ task derived_cols { String? lab_highlight_loc Array[File] table_map = [] - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" Int disk_size = 50 } parameter_meta { @@ -848,6 +848,7 @@ task filter_sequences_to_list { String out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") # Prior docker image: "nextstrain/base:build-20211012T204409Z" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" Int disk_size = 750 } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_read_utils.wdl b/pipes/WDL/tasks/tasks_read_utils.wdl index 195198e0b..60bc8ded5 100644 --- a/pipes/WDL/tasks/tasks_read_utils.wdl +++ b/pipes/WDL/tasks/tasks_read_utils.wdl @@ -84,7 +84,7 @@ task group_bams_by_sample { task get_bam_samplename { input { File bam - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = round(size(bam, "GB")) + 50 command <<< @@ -111,7 +111,7 @@ task get_sample_meta { input { Array[File] samplesheets_extended - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 50 command <<< @@ -172,7 +172,7 @@ task merge_and_reheader_bams { File? reheader_table String out_basename = basename(in_bams[0], ".bam") - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 750 @@ -244,7 +244,7 @@ task rmdup_ubam { String method = "mvicuna" Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 375 @@ -303,7 +303,7 @@ task downsample_bams { Boolean deduplicateAfter = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 750 @@ -367,7 +367,7 @@ task FastqToUBAM { String? sequencing_center String? additional_picard_options - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 375 parameter_meta { @@ -418,7 +418,7 @@ task read_depths { File aligned_bam String out_basename = basename(aligned_bam, '.bam') - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 200 command <<< diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 8d2501f9e..50e37006d 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -15,7 +15,7 @@ task alignment_metrics { Int max_amplicons=500 Int machine_mem_gb=13 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } String out_basename = basename(aligned_bam, ".bam") @@ -136,7 +136,7 @@ task plot_coverage { String? plotXLimits # of the form "min max" (ints, space between) String? plotYLimits # of the form "min max" (ints, space between) - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 375 @@ -283,7 +283,7 @@ task coverage_report { Array[File] mapped_bam_idx # optional.. speeds it up if you provide it, otherwise we auto-index String out_report_name = "coverage_report.txt" - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 375 @@ -350,7 +350,7 @@ task fastqc { input { File reads_bam - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } parameter_meta { reads_bam:{ @@ -397,7 +397,7 @@ task align_and_count { Boolean? keep_singletons_when_filtering = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } String reads_basename=basename(reads_bam, ".bam") @@ -472,7 +472,7 @@ task align_and_count_summary { String output_prefix = "count_summary" - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_taxon_filter.wdl b/pipes/WDL/tasks/tasks_taxon_filter.wdl index 0907bc3f9..dda5ab4d0 100644 --- a/pipes/WDL/tasks/tasks_taxon_filter.wdl +++ b/pipes/WDL/tasks/tasks_taxon_filter.wdl @@ -211,7 +211,7 @@ task merge_one_per_sample { Boolean rmdup = false Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 750 diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl index 293f907eb..575a17cb2 100644 --- a/pipes/WDL/tasks/tasks_utils.wdl +++ b/pipes/WDL/tasks/tasks_utils.wdl @@ -104,7 +104,7 @@ task zcat { { if [ -f /sys/fs/cgroup/memory.peak ]; then cat /sys/fs/cgroup/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.peak ]; then cat /sys/fs/cgroup/memory/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.max_usage_in_bytes ]; then cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes; else echo "0"; fi } > MEM_BYTES >>> runtime { - docker: "quay.io/broadinstitute/viral-core:2.3.0" + docker: "quay.io/broadinstitute/viral-core:2.3.1" memory: "1 GB" cpu: cpus disks: "local-disk " + disk_size + " LOCAL" @@ -399,7 +399,7 @@ task tsv_join { runtime { memory: "~{machine_mem_gb} GB" cpu: 4 - docker: "quay.io/broadinstitute/viral-core:2.3.0" + docker: "quay.io/broadinstitute/viral-core:2.3.1" disks: "local-disk " + disk_size + " HDD" disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" @@ -486,7 +486,7 @@ task tsv_stack { input { Array[File]+ input_tsvs String out_basename - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" } Int disk_size = 50 @@ -749,7 +749,7 @@ task filter_sequences_by_length { File sequences_fasta Int min_non_N = 1 - String docker = "quay.io/broadinstitute/viral-core:2.3.0" + String docker = "quay.io/broadinstitute/viral-core:2.3.1" Int disk_size = 750 } parameter_meta { diff --git a/requirements-modules.txt b/requirements-modules.txt index 53718d00a..7a178f479 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=2.3.0 +broadinstitute/viral-core=2.3.1 broadinstitute/viral-assemble=2.2.4.0 broadinstitute/viral-classify=2.2.4.0 broadinstitute/viral-phylo=2.1.20.2 From e64db2f9371c28385a9c9ccc18410bbbc9e6d681 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Wed, 6 Mar 2024 14:39:10 -0500 Subject: [PATCH 04/13] set viral-core image to 2.3.0 --- pipes/WDL/tasks/tasks_assembly.wdl | 4 ++-- pipes/WDL/tasks/tasks_demux.wdl | 4 ++-- pipes/WDL/tasks/tasks_interhost.wdl | 2 +- pipes/WDL/tasks/tasks_ncbi.wdl | 6 +++--- pipes/WDL/tasks/tasks_nextstrain.wdl | 2 +- pipes/WDL/tasks/tasks_read_utils.wdl | 14 +++++++------- pipes/WDL/tasks/tasks_reports.wdl | 12 ++++++------ pipes/WDL/tasks/tasks_taxon_filter.wdl | 2 +- pipes/WDL/tasks/tasks_utils.wdl | 8 ++++---- requirements-modules.txt | 2 +- 10 files changed, 28 insertions(+), 28 deletions(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 3bfbcd2a1..cab357939 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -450,7 +450,7 @@ task align_reads { Boolean skip_mark_dupes = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") } @@ -846,7 +846,7 @@ task run_discordance { String out_basename = "run" Int min_coverage = 4 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { reads_aligned_bam: { diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl index ba5dc587f..7ad347fca 100644 --- a/pipes/WDL/tasks/tasks_demux.wdl +++ b/pipes/WDL/tasks/tasks_demux.wdl @@ -6,7 +6,7 @@ task merge_tarballs { String out_filename Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 2625 @@ -163,7 +163,7 @@ task illumina_demux { Int? machine_mem_gb Int disk_size = 2625 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl index 0c1f1f19c..a3f657387 100644 --- a/pipes/WDL/tasks/tasks_interhost.wdl +++ b/pipes/WDL/tasks/tasks_interhost.wdl @@ -351,7 +351,7 @@ task index_ref { File? novocraft_license Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index 17757f862..6d4c02c1c 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -192,7 +192,7 @@ task structured_comments { File? filter_to_ids - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String out_base = basename(assembly_stats_tsv, '.txt') command <<< @@ -272,7 +272,7 @@ task rename_fasta_header { String out_basename = basename(genome_fasta, ".fasta") - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } command { set -e @@ -437,7 +437,7 @@ task sra_meta_prep { Boolean paired String out_name = "sra_metadata.tsv" - String docker="quay.io/broadinstitute/viral-core:2.3.1" + String docker="quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 parameter_meta { diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index cc64c6321..4d2431f60 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -280,7 +280,7 @@ task derived_cols { String? lab_highlight_loc Array[File] table_map = [] - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" Int disk_size = 50 } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_read_utils.wdl b/pipes/WDL/tasks/tasks_read_utils.wdl index 60bc8ded5..195198e0b 100644 --- a/pipes/WDL/tasks/tasks_read_utils.wdl +++ b/pipes/WDL/tasks/tasks_read_utils.wdl @@ -84,7 +84,7 @@ task group_bams_by_sample { task get_bam_samplename { input { File bam - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = round(size(bam, "GB")) + 50 command <<< @@ -111,7 +111,7 @@ task get_sample_meta { input { Array[File] samplesheets_extended - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 50 command <<< @@ -172,7 +172,7 @@ task merge_and_reheader_bams { File? reheader_table String out_basename = basename(in_bams[0], ".bam") - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 @@ -244,7 +244,7 @@ task rmdup_ubam { String method = "mvicuna" Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -303,7 +303,7 @@ task downsample_bams { Boolean deduplicateAfter = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 @@ -367,7 +367,7 @@ task FastqToUBAM { String? sequencing_center String? additional_picard_options - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 parameter_meta { @@ -418,7 +418,7 @@ task read_depths { File aligned_bam String out_basename = basename(aligned_bam, '.bam') - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 200 command <<< diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 50e37006d..8d2501f9e 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -15,7 +15,7 @@ task alignment_metrics { Int max_amplicons=500 Int machine_mem_gb=13 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String out_basename = basename(aligned_bam, ".bam") @@ -136,7 +136,7 @@ task plot_coverage { String? plotXLimits # of the form "min max" (ints, space between) String? plotYLimits # of the form "min max" (ints, space between) - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -283,7 +283,7 @@ task coverage_report { Array[File] mapped_bam_idx # optional.. speeds it up if you provide it, otherwise we auto-index String out_report_name = "coverage_report.txt" - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 375 @@ -350,7 +350,7 @@ task fastqc { input { File reads_bam - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } parameter_meta { reads_bam:{ @@ -397,7 +397,7 @@ task align_and_count { Boolean? keep_singletons_when_filtering = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } String reads_basename=basename(reads_bam, ".bam") @@ -472,7 +472,7 @@ task align_and_count_summary { String output_prefix = "count_summary" - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 100 diff --git a/pipes/WDL/tasks/tasks_taxon_filter.wdl b/pipes/WDL/tasks/tasks_taxon_filter.wdl index dda5ab4d0..0907bc3f9 100644 --- a/pipes/WDL/tasks/tasks_taxon_filter.wdl +++ b/pipes/WDL/tasks/tasks_taxon_filter.wdl @@ -211,7 +211,7 @@ task merge_one_per_sample { Boolean rmdup = false Int machine_mem_gb = 7 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 750 diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl index 575a17cb2..293f907eb 100644 --- a/pipes/WDL/tasks/tasks_utils.wdl +++ b/pipes/WDL/tasks/tasks_utils.wdl @@ -104,7 +104,7 @@ task zcat { { if [ -f /sys/fs/cgroup/memory.peak ]; then cat /sys/fs/cgroup/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.peak ]; then cat /sys/fs/cgroup/memory/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.max_usage_in_bytes ]; then cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes; else echo "0"; fi } > MEM_BYTES >>> runtime { - docker: "quay.io/broadinstitute/viral-core:2.3.1" + docker: "quay.io/broadinstitute/viral-core:2.3.0" memory: "1 GB" cpu: cpus disks: "local-disk " + disk_size + " LOCAL" @@ -399,7 +399,7 @@ task tsv_join { runtime { memory: "~{machine_mem_gb} GB" cpu: 4 - docker: "quay.io/broadinstitute/viral-core:2.3.1" + docker: "quay.io/broadinstitute/viral-core:2.3.0" disks: "local-disk " + disk_size + " HDD" disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" @@ -486,7 +486,7 @@ task tsv_stack { input { Array[File]+ input_tsvs String out_basename - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" } Int disk_size = 50 @@ -749,7 +749,7 @@ task filter_sequences_by_length { File sequences_fasta Int min_non_N = 1 - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" Int disk_size = 750 } parameter_meta { diff --git a/requirements-modules.txt b/requirements-modules.txt index 7a178f479..53718d00a 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,4 +1,4 @@ -broadinstitute/viral-core=2.3.1 +broadinstitute/viral-core=2.3.0 broadinstitute/viral-assemble=2.2.4.0 broadinstitute/viral-classify=2.2.4.0 broadinstitute/viral-phylo=2.1.20.2 From 61942e345c8adae107090660c4b3451c8828cec4 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Wed, 6 Mar 2024 14:45:31 -0500 Subject: [PATCH 05/13] pin viral-core to 2.3.0 in nextstrain tasks too --- pipes/WDL/tasks/tasks_nextstrain.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 4d2431f60..3ed618939 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -848,7 +848,7 @@ task filter_sequences_to_list { String out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta") # Prior docker image: "nextstrain/base:build-20211012T204409Z" - String docker = "quay.io/broadinstitute/viral-core:2.3.1" + String docker = "quay.io/broadinstitute/viral-core:2.3.0" Int disk_size = 750 } parameter_meta { From 7c304a4c8daa27b14106b529baa1c1b8f62f04e5 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:12:51 -0500 Subject: [PATCH 06/13] add keep_duplicates_when_filtering toggle to align_and_count; output additional metrics add keep_duplicates_when_filtering toggle to align_and_count task; also have this task output additional metrics for the percent of mapped reads aligning to hits that are not the top hit, and the percent of total input reads that mapped to any of the align_and_count ref seqs (i.e. how much crosstalk, and how much of the total sample, respectively) --- pipes/WDL/tasks/tasks_reports.wdl | 16 +++++++++++++++- pipes/WDL/workflows/align_and_count.wdl | 11 ++++++++--- pipes/WDL/workflows/classify_single.wdl | 2 ++ pipes/WDL/workflows/metagenomic_denovo.wdl | 3 +++ 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 8d2501f9e..ce8cb13a5 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -395,6 +395,7 @@ task align_and_count { Boolean? filter_bam_to_proper_primary_mapped_reads = true Boolean? do_not_require_proper_mapped_pairs_when_filtering = false Boolean? keep_singletons_when_filtering = false + Boolean? keep_duplicates_when_filtering = false Int? machine_mem_gb String docker = "quay.io/broadinstitute/viral-core:2.3.0" @@ -441,17 +442,30 @@ task align_and_count { ${true="--filterReadsAfterAlignment" false="" filter_bam_to_proper_primary_mapped_reads} \ ${true="--doNotRequirePairsToBeProper" false="" do_not_require_proper_mapped_pairs_when_filtering} \ ${true="--keepSingletons" false="" keep_singletons_when_filtering} \ + ${true="--keepDuplicates" false="" keep_duplicates_when_filtering} \ --loglevel=DEBUG sort -b -r -n -k3 "${reads_basename}.count.${ref_basename}.txt.unsorted" > "${reads_basename}.count.${ref_basename}.txt" head -n ${topNHits} "${reads_basename}.count.${ref_basename}.txt" > "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" - head -1 "${reads_basename}.count.${ref_basename}.txt" | cut -f 1 > "${reads_basename}.count.${ref_basename}.top.txt" + TOP_HIT="$(head -1 '${reads_basename}.count.${ref_basename}.txt' | cut -f 1 | tee '${reads_basename}.count.${ref_basename}.top.txt'" + + TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 ) + TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l) + PCT_MAPPING_TO_LESSER_HITS=$(echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | bc -l | awk '{printf "%.3f\n", $0}' | tee '${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt') + + TOTAL_READS_IN_INPUT=$(samtools view -c "${reads_basename}.bam") + PCT_OF_INPUT_READS_MAPPED=$(echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | bc -l | awk '{printf "%.3f\n", $0}' | tee '${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt') } output { File report = "${reads_basename}.count.${ref_basename}.txt" + File report_top_hits = "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" String top_hit_id = read_string("${reads_basename}.count.${ref_basename}.top.txt") + + String pct_total_reads_mapped = read_string('${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt') + String pct_lesser_hits_of_mapped = read_string('${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt') + String viralngs_version = read_string("VERSION") } diff --git a/pipes/WDL/workflows/align_and_count.wdl b/pipes/WDL/workflows/align_and_count.wdl index 950bca48d..01577871b 100644 --- a/pipes/WDL/workflows/align_and_count.wdl +++ b/pipes/WDL/workflows/align_and_count.wdl @@ -22,8 +22,13 @@ workflow align_and_count_report { call reports.align_and_count output { - File report = align_and_count.report - File report_top_hits = align_and_count.report_top_hits - String viral_core_version = align_and_count.viralngs_version + File report = align_and_count.report + File report_top_hits = align_and_count.report_top_hits + String tophit = spikein.top_hit_id + + String pct_mapped_of_total_reads = spikein.pct_total_reads_mapped + String pct_mapped_to_lesser_hits = spikein.pct_lesser_hits_of_mapped + + String viral_core_version = align_and_count.viralngs_version } } diff --git a/pipes/WDL/workflows/classify_single.wdl b/pipes/WDL/workflows/classify_single.wdl index bea728228..42de1fac0 100644 --- a/pipes/WDL/workflows/classify_single.wdl +++ b/pipes/WDL/workflows/classify_single.wdl @@ -151,6 +151,8 @@ workflow classify_single { File cleaned_fastqc = fastqc_cleaned.fastqc_html File spikein_report = spikein.report String spikein_tophit = spikein.top_hit_id + String spikein_pct_of_total_reads = spikein.pct_total_reads_mapped + String spikein_pct_lesser_hits = spikein.pct_lesser_hits_of_mapped String kraken2_viral_classify_version = kraken2.viralngs_version String deplete_viral_classify_version = deplete.viralngs_version diff --git a/pipes/WDL/workflows/metagenomic_denovo.wdl b/pipes/WDL/workflows/metagenomic_denovo.wdl index bebf5db73..c159a63fc 100644 --- a/pipes/WDL/workflows/metagenomic_denovo.wdl +++ b/pipes/WDL/workflows/metagenomic_denovo.wdl @@ -255,6 +255,9 @@ workflow metagenomic_denovo { Float bases_aligned = refine.align_to_self_merged_bases_aligned File? spikein_hits = spikein.report + String spikein_tophit = spikein.top_hit_id + String spikein_pct_of_total_reads = spikein.pct_total_reads_mapped + String spikein_pct_lesser_hits = spikein.pct_lesser_hits_of_mapped String viral_classify_version = kraken2.viralngs_version String viral_assemble_version = assemble.viralngs_version From cdb1af31611a4ae911a5a6ab56b0fcf786591a70 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:17:43 -0500 Subject: [PATCH 07/13] disable align_and_count filtering by default --- pipes/WDL/tasks/tasks_reports.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index ce8cb13a5..7a397a30a 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -392,7 +392,7 @@ task align_and_count { File ref_db Int topNHits = 3 - Boolean? filter_bam_to_proper_primary_mapped_reads = true + Boolean? filter_bam_to_proper_primary_mapped_reads = false Boolean? do_not_require_proper_mapped_pairs_when_filtering = false Boolean? keep_singletons_when_filtering = false Boolean? keep_duplicates_when_filtering = false @@ -428,6 +428,10 @@ task align_and_count { description: "Keep singletons when filtering", category: "optional" } + keep_duplicates_when_filtering: { + description: "Do not exclude reads marked as duplicates when filtering", + category: "optional" + } } command { set -ex -o pipefail From 07816fa3905c4b23fd463ad55d4373f69ee4b2b8 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:43:26 -0500 Subject: [PATCH 08/13] add missing close paren --- pipes/WDL/tasks/tasks_reports.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 7a397a30a..df1ae20ac 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -451,7 +451,7 @@ task align_and_count { sort -b -r -n -k3 "${reads_basename}.count.${ref_basename}.txt.unsorted" > "${reads_basename}.count.${ref_basename}.txt" head -n ${topNHits} "${reads_basename}.count.${ref_basename}.txt" > "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" - TOP_HIT="$(head -1 '${reads_basename}.count.${ref_basename}.txt' | cut -f 1 | tee '${reads_basename}.count.${ref_basename}.top.txt'" + TOP_HIT="$(head -1 '${reads_basename}.count.${ref_basename}.txt' | cut -f 1 | tee '${reads_basename}.count.${ref_basename}.top.txt')" TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 ) TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l) From 00d517d1530d74c45588f3d35ca7bd80f3d844e4 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:45:00 -0500 Subject: [PATCH 09/13] actions/checkout v3 -> v4 --- .github/workflows/build.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2a1a1a21d..a62bb38b8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,9 +35,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup @@ -88,9 +88,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup @@ -138,9 +138,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup @@ -183,9 +183,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup @@ -238,9 +238,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup @@ -304,9 +304,9 @@ jobs: DX_PROJECT: project-F8PQ6380xf5bK0Qk0YPjB17P steps: - name: checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # fetch git tags (tagged releases) because - # actions/checkout@v3 does either a full checkout or a shallow checkout without tags + # actions/checkout@v4 does either a full checkout or a shallow checkout without tags - name: fetch tags run: git fetch --prune --unshallow --tags - name: Programmatic environment setup From a4c528b6b0be95824c984b15f22673f3ae77bd74 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:51:25 -0500 Subject: [PATCH 10/13] align commands for readability --- pipes/WDL/tasks/tasks_reports.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index df1ae20ac..8bd4343bb 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -453,12 +453,12 @@ task align_and_count { head -n ${topNHits} "${reads_basename}.count.${ref_basename}.txt" > "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" TOP_HIT="$(head -1 '${reads_basename}.count.${ref_basename}.txt' | cut -f 1 | tee '${reads_basename}.count.${ref_basename}.top.txt')" - TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 ) + TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3) TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l) - PCT_MAPPING_TO_LESSER_HITS=$(echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | bc -l | awk '{printf "%.3f\n", $0}' | tee '${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt') + PCT_MAPPING_TO_LESSER_HITS=$(echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | bc -l | awk '{printf "%.3f\n", $0}' | tee ${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt) TOTAL_READS_IN_INPUT=$(samtools view -c "${reads_basename}.bam") - PCT_OF_INPUT_READS_MAPPED=$(echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | bc -l | awk '{printf "%.3f\n", $0}' | tee '${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt') + PCT_OF_INPUT_READS_MAPPED=$(echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | bc -l | awk '{printf "%.3f\n", $0}' | tee ${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt) } output { From 66b4804f31ad5e3ee43e8756da0c3dd5581e1bf5 Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 18:51:43 -0500 Subject: [PATCH 11/13] actions/setup-python v4 -> v5 --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a62bb38b8..20e870208 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,7 +67,7 @@ jobs: run: | env - name: install python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: install system dependencies @@ -166,7 +166,7 @@ jobs: echo "GITHUB_ACTIONS_BRANCH=$GITHUB_ACTIONS_BRANCH" echo "GITHUB_ACTIONS_BRANCH=$GITHUB_ACTIONS_BRANCH" >> $GITHUB_ENV - name: install python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: install docs dependencies @@ -273,7 +273,7 @@ jobs: sudo rm -rf "/usr/local/share/boost" sudo rm -rf "$AGENT_TOOLSDIRECTORY" - name: install python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: install system dependencies @@ -337,7 +337,7 @@ jobs: echo "${{ github.event.action }}" echo "${{ github.event.pull_request.merged }}" - name: install python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.8' - name: install java From 7eaec9866fdf17a90da3cb1c07f3b684594cd6de Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 19:36:12 -0500 Subject: [PATCH 12/13] ${} to ~{} in align_and_count wdl, minor corrections where new outputs are added to existing workflows --- pipes/WDL/tasks/tasks_reports.wdl | 48 +++++++++++----------- pipes/WDL/workflows/align_and_count.wdl | 6 +-- pipes/WDL/workflows/metagenomic_denovo.wdl | 6 +-- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index 8bd4343bb..b613df202 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -433,42 +433,44 @@ task align_and_count { category: "optional" } } - command { + command <<< set -ex -o pipefail read_utils.py --version | tee VERSION - ln -s "${reads_bam}" "${reads_basename}.bam" + ln -s "~{reads_bam}" "~{reads_basename}.bam" read_utils.py minimap2_idxstats \ - "${reads_basename}.bam" \ - "${ref_db}" \ - --outStats "${reads_basename}.count.${ref_basename}.txt.unsorted" \ - ${true="--filterReadsAfterAlignment" false="" filter_bam_to_proper_primary_mapped_reads} \ - ${true="--doNotRequirePairsToBeProper" false="" do_not_require_proper_mapped_pairs_when_filtering} \ - ${true="--keepSingletons" false="" keep_singletons_when_filtering} \ - ${true="--keepDuplicates" false="" keep_duplicates_when_filtering} \ + "~{reads_basename}.bam" \ + "~{ref_db}" \ + --outStats "~{reads_basename}.count.~{ref_basename}.txt.unsorted" \ + ~{true="--filterReadsAfterAlignment" false="" filter_bam_to_proper_primary_mapped_reads} \ + ~{true="--doNotRequirePairsToBeProper" false="" do_not_require_proper_mapped_pairs_when_filtering} \ + ~{true="--keepSingletons" false="" keep_singletons_when_filtering} \ + ~{true="--keepDuplicates" false="" keep_duplicates_when_filtering} \ --loglevel=DEBUG - sort -b -r -n -k3 "${reads_basename}.count.${ref_basename}.txt.unsorted" > "${reads_basename}.count.${ref_basename}.txt" - head -n ${topNHits} "${reads_basename}.count.${ref_basename}.txt" > "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" - TOP_HIT="$(head -1 '${reads_basename}.count.${ref_basename}.txt' | cut -f 1 | tee '${reads_basename}.count.${ref_basename}.top.txt')" + sort -b -r -n -k3 "~{reads_basename}.count.~{ref_basename}.txt.unsorted" > "~{reads_basename}.count.~{ref_basename}.txt" + head -n ~{topNHits} "~{reads_basename}.count.~{ref_basename}.txt" > "~{reads_basename}.count.~{ref_basename}.top_~{topNHits}_hits.txt" + TOP_HIT="$(head -1 '~{reads_basename}.count.~{ref_basename}.txt' | cut -f 1 | tee '~{reads_basename}.count.~{ref_basename}.top.txt')" - TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3) - TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "${reads_basename}.count.${ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l) - PCT_MAPPING_TO_LESSER_HITS=$(echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | bc -l | awk '{printf "%.3f\n", $0}' | tee ${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt) + TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" | cut -f3 | tee TOTAL_COUNT_OF_TOP_HIT) + TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l | tee TOTAL_COUNT_OF_LESSER_HITS) + PCT_MAPPING_TO_LESSER_HITS=$( echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | \ + bc -l | awk '{printf "%.3f\n", $0}' | tee '~{reads_basename}.count.~{ref_basename}.pct_lesser_hits_of_mapped.txt' ) - TOTAL_READS_IN_INPUT=$(samtools view -c "${reads_basename}.bam") - PCT_OF_INPUT_READS_MAPPED=$(echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | bc -l | awk '{printf "%.3f\n", $0}' | tee ${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt) - } + TOTAL_READS_IN_INPUT=$(samtools view -c "~{reads_basename}.bam") + PCT_OF_INPUT_READS_MAPPED=$( echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | \ + bc -l | awk '{printf "%.3f\n", $0}' | tee '~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt' ) + >>> output { - File report = "${reads_basename}.count.${ref_basename}.txt" + File report = "~{reads_basename}.count.~{ref_basename}.txt" - File report_top_hits = "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt" - String top_hit_id = read_string("${reads_basename}.count.${ref_basename}.top.txt") + File report_top_hits = "~{reads_basename}.count.~{ref_basename}.top_~{topNHits}_hits.txt" + String top_hit_id = read_string("~{reads_basename}.count.~{ref_basename}.top.txt") - String pct_total_reads_mapped = read_string('${reads_basename}.count.${ref_basename}.pct_total_reads_mapped.txt') - String pct_lesser_hits_of_mapped = read_string('${reads_basename}.count.${ref_basename}.pct_lesser_hits_of_mapped.txt') + String pct_total_reads_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt') + String pct_lesser_hits_of_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_lesser_hits_of_mapped.txt') String viralngs_version = read_string("VERSION") } diff --git a/pipes/WDL/workflows/align_and_count.wdl b/pipes/WDL/workflows/align_and_count.wdl index 01577871b..468ffe58e 100644 --- a/pipes/WDL/workflows/align_and_count.wdl +++ b/pipes/WDL/workflows/align_and_count.wdl @@ -24,10 +24,10 @@ workflow align_and_count_report { output { File report = align_and_count.report File report_top_hits = align_and_count.report_top_hits - String tophit = spikein.top_hit_id + String tophit = align_and_count.top_hit_id - String pct_mapped_of_total_reads = spikein.pct_total_reads_mapped - String pct_mapped_to_lesser_hits = spikein.pct_lesser_hits_of_mapped + String pct_mapped_of_total_reads = align_and_count.pct_total_reads_mapped + String pct_mapped_to_lesser_hits = align_and_count.pct_lesser_hits_of_mapped String viral_core_version = align_and_count.viralngs_version } diff --git a/pipes/WDL/workflows/metagenomic_denovo.wdl b/pipes/WDL/workflows/metagenomic_denovo.wdl index c159a63fc..4dfbb55ab 100644 --- a/pipes/WDL/workflows/metagenomic_denovo.wdl +++ b/pipes/WDL/workflows/metagenomic_denovo.wdl @@ -255,9 +255,9 @@ workflow metagenomic_denovo { Float bases_aligned = refine.align_to_self_merged_bases_aligned File? spikein_hits = spikein.report - String spikein_tophit = spikein.top_hit_id - String spikein_pct_of_total_reads = spikein.pct_total_reads_mapped - String spikein_pct_lesser_hits = spikein.pct_lesser_hits_of_mapped + String? spikein_tophit = spikein.top_hit_id + String? spikein_pct_of_total_reads = spikein.pct_total_reads_mapped + String? spikein_pct_lesser_hits = spikein.pct_lesser_hits_of_mapped String viral_classify_version = kraken2.viralngs_version String viral_assemble_version = assemble.viralngs_version From 78f8fa0b790a5bcacf6b61c68005a85c6ff3acfa Mon Sep 17 00:00:00 2001 From: Christopher Tomkins-Tinch Date: Fri, 8 Mar 2024 20:23:31 -0500 Subject: [PATCH 13/13] require values for the various filtering-related Boolean inputs in align_and_count require values for the various filtering-related Boolean inputs in align_and_count, since the default values guarantee they'll be set --- pipes/WDL/tasks/tasks_reports.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index b613df202..3c9b192fa 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -392,10 +392,10 @@ task align_and_count { File ref_db Int topNHits = 3 - Boolean? filter_bam_to_proper_primary_mapped_reads = false - Boolean? do_not_require_proper_mapped_pairs_when_filtering = false - Boolean? keep_singletons_when_filtering = false - Boolean? keep_duplicates_when_filtering = false + Boolean filter_bam_to_proper_primary_mapped_reads = false + Boolean do_not_require_proper_mapped_pairs_when_filtering = false + Boolean keep_singletons_when_filtering = false + Boolean keep_duplicates_when_filtering = false Int? machine_mem_gb String docker = "quay.io/broadinstitute/viral-core:2.3.0"