diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 878ed920c..ae5715e40 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -648,3 +648,70 @@ task refine_2x_and_plot { dx_instance_type: "mem1_ssd1_v2_x8" } } + +task run_discordance { + meta { + description: "This step evaluates discordance between sequencing runs of the same sample. The input is a merged, aligned BAM file for a single sample. If multiple runs (read groups) exist, we split the aligned reads by read group and separately evaluate consensus calls per read group using bcftools mpileup and call. A VCF is emitted that describes variation between runs." + } + + input { + File reads_aligned_bam + File reference_fasta + String out_basename = "run" + + String docker="quay.io/broadinstitute/viral-core" + } + + command { + set -ex -o pipefail + + read_utils.py --version | tee VERSION + + # create 2-col table with read group ids in both cols + python3 < filtered.vcf + cat filtered.vcf | bcftools filter -i 'MAC>0' > "${out_basename}.discordant.vcf" + + # tally outputs + set +o pipefail # to handle empty grep + cat filtered.vcf | bcftools filter -i 'MAC=0' | grep -v '^#' | wc -l | tee num_concordant + cat "${out_basename}.discordant.vcf" | bcftools filter -i 'TYPE="snp"' | grep -v '^#' | wc -l | tee num_discordant_snps + cat "${out_basename}.discordant.vcf" | bcftools filter -i 'TYPE!="snp"' | grep -v '^#' | wc -l | tee num_discordant_indels + } + + output { + File discordant_sites_vcf = "${out_basename}.discordant.vcf" + Int concordant_sites = read_int("num_concordant") + Int discordant_snps = read_int("num_discordant_snps") + Int discordant_indels = read_int("num_discordant_indels") + String viralngs_version = read_string("VERSION") + } + + runtime { + docker: "${docker}" + memory: "3 GB" + cpu: 2 + disks: "local-disk 100 HDD" + dx_instance_type: "mem1_ssd1_v2_x2" + preemptible: 1 + } +} diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl index c16319367..03e178824 100644 --- a/pipes/WDL/tasks/tasks_reports.wdl +++ b/pipes/WDL/tasks/tasks_reports.wdl @@ -145,7 +145,6 @@ task align_and_count { input { File reads_bam File ref_db - Int? minScoreToFilter Int? topNHits = 3 Int? machine_mem_gb @@ -161,11 +160,10 @@ task align_and_count { read_utils.py --version | tee VERSION ln -s ${reads_bam} ${reads_basename}.bam - read_utils.py bwamem_idxstats \ + read_utils.py minimap2_idxstats \ ${reads_basename}.bam \ ${ref_db} \ --outStats ${reads_basename}.count.${ref_basename}.txt.unsorted \ - ${'--minScoreToFilter=' + minScoreToFilter} \ --loglevel=DEBUG sort -b -r -n -k3 ${reads_basename}.count.${ref_basename}.txt.unsorted > ${reads_basename}.count.${ref_basename}.txt @@ -179,7 +177,7 @@ task align_and_count { } runtime { - memory: select_first([machine_mem_gb, 7]) + " GB" + memory: select_first([machine_mem_gb, 3]) + " GB" cpu: 4 docker: "${docker}" disks: "local-disk 375 LOCAL" @@ -367,7 +365,7 @@ task tsv_join { String join_type="inner" String out_basename - String docker="stratdat/csvkit" + String docker="quay.io/broadinstitute/viral-core" } command { @@ -408,7 +406,7 @@ task tsv_stack { input { Array[File]+ input_tsvs String out_basename - String docker="stratdat/csvkit" + String docker="quay.io/broadinstitute/viral-core" } command { diff --git a/pipes/WDL/workflows/assemble_refbased.wdl b/pipes/WDL/workflows/assemble_refbased.wdl index 10f027543..46ad34a05 100644 --- a/pipes/WDL/workflows/assemble_refbased.wdl +++ b/pipes/WDL/workflows/assemble_refbased.wdl @@ -94,6 +94,13 @@ workflow assemble_refbased { out_basename = "${sample_name}.align_to_ref.trimmed" } + call assembly.run_discordance { + input: + reads_aligned_bam = merge_align_to_ref.out_bam, + reference_fasta = reference_fasta, + out_basename = sample_name + } + call reports.plot_coverage as plot_ref_coverage { input: aligned_reads_bam = merge_align_to_ref.out_bam, @@ -140,6 +147,11 @@ workflow assemble_refbased { Int reference_genome_length = plot_ref_coverage.assembly_length Float assembly_mean_coverage = plot_ref_coverage.mean_coverage + Int replicate_concordant_sites = run_discordance.concordant_sites + Int replicate_discordant_snps = run_discordance.discordant_snps + Int replicate_discordant_indels = run_discordance.discordant_indels + File replicate_discordant_vcf = run_discordance.discordant_sites_vcf + Array[File] align_to_ref_per_input_aligned_flagstat = align_to_ref.aligned_bam_flagstat Array[Int] align_to_ref_per_input_reads_provided = align_to_ref.reads_provided Array[Int] align_to_ref_per_input_reads_aligned = align_to_ref.reads_aligned diff --git a/pipes/WDL/workflows/diff_genome_sets.wdl b/pipes/WDL/workflows/diff_genome_sets.wdl index 6d9b3c9aa..c30a02991 100644 --- a/pipes/WDL/workflows/diff_genome_sets.wdl +++ b/pipes/WDL/workflows/diff_genome_sets.wdl @@ -21,7 +21,7 @@ workflow diff_genome_sets { call reports.tsv_stack { input: input_tsvs = compare_two_genomes.comparison_table, - out_basename = "diff_genome_sets.txt" + out_basename = "diff_genome_sets" } output { diff --git a/requirements-modules.txt b/requirements-modules.txt index 37dd691e2..df4d77273 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,7 +1,7 @@ -broadinstitute/viral-core=2.1.3 -broadinstitute/viral-assemble=2.1.3.1 -broadinstitute/viral-classify=2.1.3.1 -broadinstitute/viral-phylo=2.1.3.1 +broadinstitute/viral-core=2.1.4 +broadinstitute/viral-assemble=2.1.4.0 +broadinstitute/viral-classify=2.1.4.0 +broadinstitute/viral-phylo=2.1.4.0 broadinstitute/beast-beagle-cuda=1.10.5pre broadinstitute/ncbi-tools=2.10.7.0 nextstrain/base=build-20200608T223413Z diff --git a/test/input/WDL/test_outputs-assemble_refbased-local.json b/test/input/WDL/test_outputs-assemble_refbased-local.json index 5eb7bef8f..829701192 100644 --- a/test/input/WDL/test_outputs-assemble_refbased-local.json +++ b/test/input/WDL/test_outputs-assemble_refbased-local.json @@ -7,5 +7,7 @@ "assemble_refbased.align_to_self_merged_reads_aligned": 18409, "assemble_refbased.reference_genome_length": 18959, "assemble_refbased.assembly_length_unambiguous": 18889, - "assemble_refbased.assembly_length": 18889 + "assemble_refbased.assembly_length": 18889, + "assemble_refbased.replicate_discordant_indels": 0, + "assemble_refbased.replicate_discordant_snps": 0 }