Skip to content

Commit

Permalink
Merge pull request #84 from broadinstitute/dp-assembly
Browse files Browse the repository at this point in the history
assemble_refbased -- do not fail when reads do not align
  • Loading branch information
dpark01 committed May 25, 2020
2 parents 569cd75 + 7ecc9c4 commit 6331430
Show file tree
Hide file tree
Showing 8 changed files with 26 additions and 10 deletions.
21 changes: 13 additions & 8 deletions pipes/WDL/tasks/tasks_assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ task align_reads {
}

command {
set -ex -o pipefail
set -ex # do not set pipefail, since grep exits 1 if it can't find the pattern

read_utils.py --version | tee VERSION

Expand Down Expand Up @@ -298,10 +298,13 @@ task align_reads {
--loglevel=DEBUG

else
touch "${sample_name}.all.bam" "${sample_name}.mapped.bam"
# handle special case of empty reference fasta -- emit empty bams (with original bam headers)
samtools view -H -b "${reads_unmapped_bam}" > "${sample_name}.all.bam"
samtools view -H -b "${reads_unmapped_bam}" > "${sample_name}.mapped.bam"

samtools index "${sample_name}.all.bam" "${sample_name}.all.bai"
samtools index "${sample_name}.mapped.bam" "${sample_name}.mapped.bai"
fi
samtools index ${sample_name}.mapped.bam

# collect figures of merit
grep -v '^>' assembly.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
Expand All @@ -311,7 +314,7 @@ task align_reads {
samtools view -h -F 260 ${sample_name}.all.bam | samtools flagstat - | tee ${sample_name}.all.bam.flagstat.txt
grep properly ${sample_name}.all.bam.flagstat.txt | cut -f 1 -d ' ' | tee read_pairs_aligned
samtools view ${sample_name}.mapped.bam | cut -f10 | tr -d '\n' | wc -c | tee bases_aligned
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length_unambiguous)") if "$(cat assembly_length_unambiguous)">0 else 0" > mean_coverage
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length_unambiguous)") if "$(cat assembly_length_unambiguous)">0 else print(0)" > mean_coverage

# fastqc mapped bam
reports.py fastqc ${sample_name}.mapped.bam ${sample_name}.mapped_fastqc.html --out_zip ${sample_name}.mapped_fastqc.zip
Expand Down Expand Up @@ -404,9 +407,10 @@ task refine_assembly_with_aligned_reads {
file_utils.py rename_fasta_sequences \
refined.fasta "${sample_name}.fasta" "${sample_name}"

# collect figures of merit
grep -v '^>' refined.fasta | tr -d '\n' | wc -c | tee assembly_length
grep -v '^>' refined.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
# collect figures of merit
set +o pipefail # grep will exit 1 if it fails to find the pattern
grep -v '^>' refined.fasta | tr -d '\n' | wc -c | tee assembly_length
grep -v '^>' refined.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
}

output {
Expand Down Expand Up @@ -570,6 +574,7 @@ task refine_2x_and_plot {
--loglevel=DEBUG

# collect figures of merit
set +o pipefail # grep will exit 1 if it fails to find the pattern
grep -v '^>' ${sample_name}.fasta | tr -d '\n' | wc -c | tee assembly_length
grep -v '^>' ${sample_name}.fasta | tr -d '\nNn' | wc -c | tee assembly_length_unambiguous
samtools view -c ${sample_name}.mapped.bam | tee reads_aligned
Expand All @@ -578,7 +583,7 @@ task refine_2x_and_plot {
grep properly ${sample_name}.all.bam.flagstat.txt | cut -f 1 -d ' ' | tee read_pairs_aligned
samtools view ${sample_name}.mapped.bam | cut -f10 | tr -d '\n' | wc -c | tee bases_aligned
#echo $(( $(cat bases_aligned) / $(cat assembly_length) )) | tee mean_coverage
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length)") if "$(cat assembly_length)">0 else 0" > mean_coverage
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length)") if "$(cat assembly_length)">0 else print(0)" > mean_coverage

# fastqc mapped bam
reports.py fastqc ${sample_name}.mapped.bam ${sample_name}.mapped_fastqc.html --out_zip ${sample_name}.mapped_fastqc.zip
Expand Down
3 changes: 2 additions & 1 deletion pipes/WDL/tasks/tasks_reports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,13 @@ task plot_coverage {
fi

# collect figures of merit
set +o pipefail # grep will exit 1 if it fails to find the pattern
samtools view -H ${aligned_reads_bam} | perl -n -e'/^@SQ.*LN:(\d+)/ && print "$1\n"' | python -c "import sys; print(sum(int(x) for x in sys.stdin))" | tee assembly_length
# report only primary alignments 260=exclude unaligned reads and secondary mappings
samtools view -h -F 260 ${aligned_reads_bam} | samtools flagstat - | tee ${sample_name}.flagstat.txt
grep properly ${sample_name}.flagstat.txt | cut -f 1 -d ' ' | tee read_pairs_aligned
samtools view ${aligned_reads_bam} | cut -f10 | tr -d '\n' | wc -c | tee bases_aligned
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length)") if "$(cat assembly_length)">0 else 0" > mean_coverage
python -c "print (float("$(cat bases_aligned)")/"$(cat assembly_length)") if "$(cat assembly_length)">0 else print(0)" > mean_coverage
}

output {
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/workflows/assemble_refbased.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ workflow assemble_refbased {

meta {
description: "Reference-based microbial consensus calling. Aligns short reads to a singular reference genome, calls a new consensus sequence, and emits: new assembly, reads aligned to provided reference, reads aligned to new assembly, various figures of merit, plots, and QC metrics. The user may provide unaligned reads spread across multiple input files and this workflow will parallelize alignment per input file before merging results prior to consensus calling."
author: "Viral Genomics"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

Expand Down
2 changes: 2 additions & 0 deletions pipes/WDL/workflows/beast_to_auspice.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import "../tasks/tasks_nextstrain.wdl" as nextstrain
workflow beast_to_auspice {
meta {
description: "Visualize BEAST output with Nextstrain. This workflow converts a BEAST MCC tree (.tree file) into an Auspice v2 json file. See https://nextstrain-augur.readthedocs.io/en/stable/faq/import-beast.html for details."
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

input {
Expand Down
2 changes: 2 additions & 0 deletions pipes/WDL/workflows/build_augur_tree.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import "../tasks/tasks_nextstrain.wdl" as nextstrain
workflow build_augur_tree {
meta {
description: "Align assemblies, build trees, and convert to json representation suitable for Nextstrain visualization. See https://nextstrain.org/docs/getting-started/ and https://nextstrain-augur.readthedocs.io/en/stable/"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

input {
Expand Down
2 changes: 2 additions & 0 deletions pipes/WDL/workflows/classify_multi.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import "../tasks/tasks_reports.wdl" as reports
workflow classify_multi {
meta {
description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), taxonomic classification of contigs (BLASTx), and FASTQC/multiQC of reads."
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

input {
Expand Down
2 changes: 2 additions & 0 deletions pipes/WDL/workflows/genbank.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ workflow genbank {

meta {
description: "Prepare assemblies for Genbank submission. This includes annotation by simple coordinate transfer from Genbank annotations and a multiple alignment. See https://viral-pipelines.readthedocs.io/en/latest/ncbi_submission.html for details."
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

input {
Expand Down
2 changes: 2 additions & 0 deletions pipes/WDL/workflows/newick_to_auspice.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import "../tasks/tasks_nextstrain.wdl" as nextstrain
workflow newick_to_auspice {
meta {
description: "Convert a newick formatted phylogenetic tree into a json suitable for auspice visualization. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/export.html"
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}

call nextstrain.export_auspice_json
Expand Down

0 comments on commit 6331430

Please sign in to comment.