From d2c22add4f4a4dc77c017cd51dd0f02d4a02f438 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 22 May 2020 13:57:46 -0400 Subject: [PATCH 1/5] bump ivar docker image --- requirements-modules.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-modules.txt b/requirements-modules.txt index 26bead7b1..2c4e205aa 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -4,4 +4,4 @@ broadinstitute/viral-classify=2.0.21.3 broadinstitute/viral-phylo=2.0.21.5 broadinstitute/beast-beagle-cuda=1.10.5 nextstrain/base=build-20200506T095107Z -andersenlabapps/ivar=1.2.1 +andersenlabapps/ivar=1.2.2 From 71f137e0ef375412cb24eb1699ade9721ee1b5d2 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 22 May 2020 15:44:57 -0400 Subject: [PATCH 2/5] increase assemble_refbased default min_coverage from 2 to 3, add parameter_meta description of optionals for task --- pipes/WDL/tasks/tasks_assembly.wdl | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 4c9408f04..44cce9fa2 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -355,12 +355,21 @@ task refine_assembly_with_aligned_reads { Boolean? mark_duplicates=false Float? major_cutoff=0.5 - Int? min_coverage=2 + Int? min_coverage=3 Int? machine_mem_gb String docker="quay.io/broadinstitute/viral-assemble" } + parameter_meta { + major_cutoff: { + description: "If the major allele is present at a frequency higher than this cutoff, we will call an unambiguous base at that position. If it is equal to or below this cutoff, we will call an ambiguous base representing all possible alleles at that position." + } + min_coverage: { + description: "Minimum read coverage required to call a position unambiguous." + } + } + command { set -ex -o pipefail From 17263935002dd38b57e1514150b1a8cdfc28047a Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 22 May 2020 15:57:36 -0400 Subject: [PATCH 3/5] update README info on viral-ngs-staging --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 025a62a14..fcf9e4e32 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Workflows are written in [WDL](https://github.com/openwdl/wdl) format. This is a Workflows from this repository are continuously deployed to [Dockstore](https://dev.dockstore.net/organizations/BroadInstitute/collections/pgs), a GA4GH Tool Repository Service. They can then be easily imported to any bioinformatic compute platform that utilizes the TRS API and understands WDL (this includes Terra, DNAnexus, DNAstack, etc). -Flattened workflows are also continuously deployed to a GCS bucket: [gs://viral-ngs-wdl](https://console.cloud.google.com/storage/browser/viral-ngs-wdl?forceOnBucketsSortingFiltering=false&organizationId=548622027621&project=gcid-viral-seq) and can be downloaded for local use. +Flattened workflows are also continuously deployed to a staging github repo [viral-ngs-staging](https://github.com/broadinstitute/viral-ngs-staging/) and a GCS bucket: [gs://viral-ngs-wdl](https://console.cloud.google.com/storage/browser/viral-ngs-wdl?forceOnBucketsSortingFiltering=false&organizationId=548622027621&project=gcid-viral-seq) and can be downloaded for local use. Workflows are also available in the [Terra featured workspace](https://app.terra.bio/#workspaces/pathogen-genomic-surveillance/COVID-19). @@ -31,7 +31,7 @@ The easiest way to get started is on a single, Docker-capable machine (your lapt For example, to list the inputs for the assemble_refbased workflow: ``` -miniwdl run https://storage.googleapis.com/viral-ngs-wdl/quay.io/broadinstitute/viral-pipelines/2.0.21.3/assemble_refbased.wdl +miniwdl run https://raw.githubusercontent.com/broadinstitute/viral-ngs-staging/master/pipes/WDL/workflows/assemble_refbased.wdl ``` This will emit: @@ -52,7 +52,7 @@ outputs: To then execute this workflow on your local machine, invoke it with like this: ``` miniwdl run \ - https://storage.googleapis.com/viral-ngs-wdl/quay.io/broadinstitute/viral-pipelines/2.0.21.3/assemble_refbased.wdl \ + https://raw.githubusercontent.com/broadinstitute/viral-ngs-staging/master/pipes/WDL/workflows/assemble_refbased.wdl \ reads_unmapped_bams=PatientA_library1.bam \ reads_unmapped_bams=PatientA_library2.bam \ reference_fasta=/refs/NC_045512.2.fasta \ From 4fe46d65c15d99e7b384592ef83b049134a86354 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 22 May 2020 16:04:45 -0400 Subject: [PATCH 4/5] change assemble_refbased final coverage stat to come from align_to_ref instead of align_to_self --- pipes/WDL/workflows/assemble_refbased.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/assemble_refbased.wdl b/pipes/WDL/workflows/assemble_refbased.wdl index 38dcd41d1..bef6f07ba 100644 --- a/pipes/WDL/workflows/assemble_refbased.wdl +++ b/pipes/WDL/workflows/assemble_refbased.wdl @@ -130,7 +130,7 @@ workflow assemble_refbased { Int assembly_length = call_consensus.assembly_length Int assembly_length_unambiguous = call_consensus.assembly_length_unambiguous Int reference_genome_length = plot_ref_coverage.assembly_length - Float assembly_mean_coverage = plot_self_coverage.mean_coverage + Float assembly_mean_coverage = plot_ref_coverage.mean_coverage Array[File] align_to_ref_per_input_aligned_flagstat = align_to_ref.aligned_bam_flagstat Array[Int] align_to_ref_per_input_reads_provided = align_to_ref.reads_provided @@ -143,7 +143,6 @@ workflow assemble_refbased { Int align_to_ref_merged_reads_aligned = plot_ref_coverage.reads_aligned Int align_to_ref_merged_read_pairs_aligned = plot_ref_coverage.read_pairs_aligned Int align_to_ref_merged_bases_aligned = plot_ref_coverage.bases_aligned - Float align_to_ref_merged_mean_coverage = plot_ref_coverage.mean_coverage File align_to_self_merged_aligned_only_bam = merge_align_to_self.out_bam File align_to_self_merged_coverage_plot = plot_self_coverage.coverage_plot @@ -151,6 +150,7 @@ workflow assemble_refbased { Int align_to_self_merged_reads_aligned = plot_self_coverage.reads_aligned Int align_to_self_merged_read_pairs_aligned = plot_self_coverage.read_pairs_aligned Int align_to_self_merged_bases_aligned = plot_self_coverage.bases_aligned + Float align_to_self_merged_mean_coverage = plot_self_coverage.mean_coverage String align_to_ref_viral_core_version = align_to_ref.viralngs_version[0] String ivar_version = ivar_trim.ivar_version[0] From 4360ca6a26e273ad1cb423c6962f2d5b1b2e81b8 Mon Sep 17 00:00:00 2001 From: Danny Park Date: Fri, 22 May 2020 20:34:16 -0400 Subject: [PATCH 5/5] update expected outputs now that min_coverage is higher, bases covered is lower --- test/input/WDL/test_outputs-assemble_refbased-local.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/input/WDL/test_outputs-assemble_refbased-local.json b/test/input/WDL/test_outputs-assemble_refbased-local.json index c08c9cfe4..f57e77af5 100644 --- a/test/input/WDL/test_outputs-assemble_refbased-local.json +++ b/test/input/WDL/test_outputs-assemble_refbased-local.json @@ -1,11 +1,11 @@ { - "assemble_refbased.align_to_self_merged_bases_aligned": 1765581, + "assemble_refbased.align_to_self_merged_bases_aligned": 1765480, "assemble_refbased.align_to_self_merged_read_pairs_aligned": 16798, - "assemble_refbased.align_to_self_merged_reads_aligned": 17481, + "assemble_refbased.align_to_self_merged_reads_aligned": 17480, "assemble_refbased.align_to_ref_merged_bases_aligned": 1800325, "assemble_refbased.align_to_ref_merged_read_pairs_aligned": 17266, "assemble_refbased.align_to_ref_merged_reads_aligned": 17825, "assemble_refbased.reference_genome_length": 18959, - "assemble_refbased.assembly_length_unambiguous": 18872, - "assemble_refbased.assembly_length": 18872 + "assemble_refbased.assembly_length_unambiguous": 18865, + "assemble_refbased.assembly_length": 18865 }