Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expanded parameterization of align_and_count and additional output metrics #525

Merged
merged 13 commits into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand Down Expand Up @@ -67,7 +67,7 @@ jobs:
run: |
env
- name: install python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: install system dependencies
Expand All @@ -88,9 +88,9 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand Down Expand Up @@ -138,9 +138,9 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand All @@ -166,7 +166,7 @@ jobs:
echo "GITHUB_ACTIONS_BRANCH=$GITHUB_ACTIONS_BRANCH"
echo "GITHUB_ACTIONS_BRANCH=$GITHUB_ACTIONS_BRANCH" >> $GITHUB_ENV
- name: install python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: install docs dependencies
Expand All @@ -183,9 +183,9 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand Down Expand Up @@ -238,9 +238,9 @@ jobs:
runs-on: ubuntu-20.04
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand Down Expand Up @@ -273,7 +273,7 @@ jobs:
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: install python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: install system dependencies
Expand Down Expand Up @@ -304,9 +304,9 @@ jobs:
DX_PROJECT: project-F8PQ6380xf5bK0Qk0YPjB17P
steps:
- name: checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4
# fetch git tags (tagged releases) because
# actions/checkout@v3 does either a full checkout or a shallow checkout without tags
# actions/checkout@v4 does either a full checkout or a shallow checkout without tags
- name: fetch tags
run: git fetch --prune --unshallow --tags
- name: Programmatic environment setup
Expand Down Expand Up @@ -337,7 +337,7 @@ jobs:
echo "${{ github.event.action }}"
echo "${{ github.event.pull_request.merged }}"
- name: install python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.8'
- name: install java
Expand Down
4 changes: 2 additions & 2 deletions pipes/WDL/tasks/tasks_assembly.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ task align_reads {
Boolean skip_mark_dupes = false

Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"

String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean")
}
Expand Down Expand Up @@ -846,7 +846,7 @@ task run_discordance {
String out_basename = "run"
Int min_coverage = 4

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
parameter_meta {
reads_aligned_bam: {
Expand Down
4 changes: 2 additions & 2 deletions pipes/WDL/tasks/tasks_demux.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ task merge_tarballs {
String out_filename

Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 2625
Expand Down Expand Up @@ -163,7 +163,7 @@ task illumina_demux {

Int? machine_mem_gb
Int disk_size = 2625
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

parameter_meta {
Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_interhost.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ task index_ref {
File? novocraft_license

Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 100
Expand Down
6 changes: 3 additions & 3 deletions pipes/WDL/tasks/tasks_ncbi.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ task structured_comments {

File? filter_to_ids

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
String out_base = basename(assembly_stats_tsv, '.txt')
command <<<
Expand Down Expand Up @@ -272,7 +272,7 @@ task rename_fasta_header {

String out_basename = basename(genome_fasta, ".fasta")

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
command {
set -e
Expand Down Expand Up @@ -437,7 +437,7 @@ task sra_meta_prep {
Boolean paired

String out_name = "sra_metadata.tsv"
String docker="quay.io/broadinstitute/viral-core:2.2.4"
String docker="quay.io/broadinstitute/viral-core:2.3.0"
}
Int disk_size = 100
parameter_meta {
Expand Down
4 changes: 2 additions & 2 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ task derived_cols {
String? lab_highlight_loc
Array[File] table_map = []

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
Int disk_size = 50
}
parameter_meta {
Expand Down Expand Up @@ -848,7 +848,7 @@ task filter_sequences_to_list {

String out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
# Prior docker image: "nextstrain/base:build-20211012T204409Z"
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
Int disk_size = 750
}
parameter_meta {
Expand Down
14 changes: 7 additions & 7 deletions pipes/WDL/tasks/tasks_read_utils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ task group_bams_by_sample {
task get_bam_samplename {
input {
File bam
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
Int disk_size = round(size(bam, "GB")) + 50
command <<<
Expand All @@ -111,7 +111,7 @@ task get_sample_meta {
input {
Array[File] samplesheets_extended

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
Int disk_size = 50
command <<<
Expand Down Expand Up @@ -172,7 +172,7 @@ task merge_and_reheader_bams {
File? reheader_table
String out_basename = basename(in_bams[0], ".bam")

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 750
Expand Down Expand Up @@ -244,7 +244,7 @@ task rmdup_ubam {
String method = "mvicuna"

Int machine_mem_gb = 7
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 375
Expand Down Expand Up @@ -303,7 +303,7 @@ task downsample_bams {
Boolean deduplicateAfter = false

Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 750
Expand Down Expand Up @@ -367,7 +367,7 @@ task FastqToUBAM {
String? sequencing_center
String? additional_picard_options

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
Int disk_size = 375
parameter_meta {
Expand Down Expand Up @@ -418,7 +418,7 @@ task read_depths {
File aligned_bam

String out_basename = basename(aligned_bam, '.bam')
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
Int disk_size = 200
command <<<
Expand Down
75 changes: 57 additions & 18 deletions pipes/WDL/tasks/tasks_reports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ task alignment_metrics {
Int max_amplicons=500

Int machine_mem_gb=13
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

String out_basename = basename(aligned_bam, ".bam")
Expand Down Expand Up @@ -136,7 +136,7 @@ task plot_coverage {
String? plotXLimits # of the form "min max" (ints, space between)
String? plotYLimits # of the form "min max" (ints, space between)

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 375
Expand Down Expand Up @@ -283,7 +283,7 @@ task coverage_report {
Array[File] mapped_bam_idx # optional.. speeds it up if you provide it, otherwise we auto-index
String out_report_name = "coverage_report.txt"

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 375
Expand Down Expand Up @@ -350,7 +350,7 @@ task fastqc {
input {
File reads_bam

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}
parameter_meta {
reads_bam:{
Expand Down Expand Up @@ -392,8 +392,13 @@ task align_and_count {
File ref_db
Int topNHits = 3

Boolean filter_bam_to_proper_primary_mapped_reads = false
Boolean do_not_require_proper_mapped_pairs_when_filtering = false
Boolean keep_singletons_when_filtering = false
Boolean keep_duplicates_when_filtering = false

Int? machine_mem_gb
String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

String reads_basename=basename(reads_bam, ".bam")
Expand All @@ -411,28 +416,62 @@ task align_and_count {
pattern: ["*.FASTA"],
category: "required"
}
filter_bam_to_proper_primary_mapped_reads: {
description: "If specified, reads till be filtered after alignment to include only those flagged as properly paired.",
category: "optional"
}
do_not_require_proper_mapped_pairs_when_filtering: {
description: "Do not require reads to be properly paired when filtering",
category: "optional"
}
keep_singletons_when_filtering: {
description: "Keep singletons when filtering",
category: "optional"
}
keep_duplicates_when_filtering: {
description: "Do not exclude reads marked as duplicates when filtering",
category: "optional"
}
}
command {
command <<<
set -ex -o pipefail

read_utils.py --version | tee VERSION

ln -s "${reads_bam}" "${reads_basename}.bam"
ln -s "~{reads_bam}" "~{reads_basename}.bam"
read_utils.py minimap2_idxstats \
"${reads_basename}.bam" \
"${ref_db}" \
--outStats "${reads_basename}.count.${ref_basename}.txt.unsorted" \
"~{reads_basename}.bam" \
"~{ref_db}" \
--outStats "~{reads_basename}.count.~{ref_basename}.txt.unsorted" \
~{true="--filterReadsAfterAlignment" false="" filter_bam_to_proper_primary_mapped_reads} \
~{true="--doNotRequirePairsToBeProper" false="" do_not_require_proper_mapped_pairs_when_filtering} \
~{true="--keepSingletons" false="" keep_singletons_when_filtering} \
~{true="--keepDuplicates" false="" keep_duplicates_when_filtering} \
--loglevel=DEBUG

sort -b -r -n -k3 "${reads_basename}.count.${ref_basename}.txt.unsorted" > "${reads_basename}.count.${ref_basename}.txt"
head -n ${topNHits} "${reads_basename}.count.${ref_basename}.txt" > "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt"
head -1 "${reads_basename}.count.${ref_basename}.txt" | cut -f 1 > "${reads_basename}.count.${ref_basename}.top.txt"
}
sort -b -r -n -k3 "~{reads_basename}.count.~{ref_basename}.txt.unsorted" > "~{reads_basename}.count.~{ref_basename}.txt"
head -n ~{topNHits} "~{reads_basename}.count.~{ref_basename}.txt" > "~{reads_basename}.count.~{ref_basename}.top_~{topNHits}_hits.txt"
TOP_HIT="$(head -1 '~{reads_basename}.count.~{ref_basename}.txt' | cut -f 1 | tee '~{reads_basename}.count.~{ref_basename}.top.txt')"

TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" | cut -f3 | tee TOTAL_COUNT_OF_TOP_HIT)
TOTAL_COUNT_OF_LESSER_HITS=$(grep -vE "^(\*|$TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" | cut -f3 | paste -sd+ - | bc -l | tee TOTAL_COUNT_OF_LESSER_HITS)
PCT_MAPPING_TO_LESSER_HITS=$( echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | \
bc -l | awk '{printf "%.3f\n", $0}' | tee '~{reads_basename}.count.~{ref_basename}.pct_lesser_hits_of_mapped.txt' )

TOTAL_READS_IN_INPUT=$(samtools view -c "~{reads_basename}.bam")
PCT_OF_INPUT_READS_MAPPED=$( echo "scale=3; 100 * ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT) / $TOTAL_READS_IN_INPUT" | \
bc -l | awk '{printf "%.3f\n", $0}' | tee '~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt' )
>>>

output {
File report = "${reads_basename}.count.${ref_basename}.txt"
File report_top_hits = "${reads_basename}.count.${ref_basename}.top_${topNHits}_hits.txt"
String top_hit_id = read_string("${reads_basename}.count.${ref_basename}.top.txt")
File report = "~{reads_basename}.count.~{ref_basename}.txt"

File report_top_hits = "~{reads_basename}.count.~{ref_basename}.top_~{topNHits}_hits.txt"
String top_hit_id = read_string("~{reads_basename}.count.~{ref_basename}.top.txt")

String pct_total_reads_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt')
String pct_lesser_hits_of_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_lesser_hits_of_mapped.txt')

String viralngs_version = read_string("VERSION")
}

Expand All @@ -453,7 +492,7 @@ task align_and_count_summary {

String output_prefix = "count_summary"

String docker = "quay.io/broadinstitute/viral-core:2.2.4"
String docker = "quay.io/broadinstitute/viral-core:2.3.0"
}

Int disk_size = 100
Expand Down
Loading
Loading