Skip to content

Commit

Permalink
Merge pull request #106 from broadinstitute/dp-classify
Browse files Browse the repository at this point in the history
strip out blastx from classify_multi workflow
  • Loading branch information
dpark01 authored Jun 10, 2020
2 parents 33188e8 + e87d48b commit 0ec8cb1
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 32 deletions.
32 changes: 1 addition & 31 deletions pipes/WDL/workflows/classify_multi.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import "../tasks/tasks_reports.wdl" as reports
workflow classify_multi {
meta {
description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), taxonomic classification of contigs (BLASTx), and FASTQC/multiQC of reads."
description: "Runs raw reads through taxonomic classification (Kraken2), human read depletion (based on Kraken2), de novo assembly (SPAdes), and FASTQC/multiQC of reads."
author: "Broad Viral Genomics"
email: "viral-ngs@broadinstitute.org"
}
Expand All @@ -23,8 +23,6 @@ workflow classify_multi {

File kraken2_db_tgz
File krona_taxonomy_db_kraken2_tgz
File? blast_db_tgz
File? krona_taxonomy_db_blast_tgz
}

parameter_meta {
Expand All @@ -48,14 +46,6 @@ workflow classify_multi {
description: "Krona taxonomy database containing a single file: taxonomy.tab, or possibly just a compressed taxonomy.tab",
patterns: ["*.tab.zst", "*.tab.gz", "*.tab", "*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"]
}
blast_db_tgz: {
description: "Pre-built BLAST database tarball containing an indexed blast database named 'nr'",
patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"]
}
krona_taxonomy_db_blast_tgz: {
description: "Krona taxonomy database: a tarball containing a taxonomy.tab file as well as accession to taxid mapping (a kraken-based taxonomy database will not suffice).",
patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"]
}
ncbi_taxdump_tgz: {
description: "An NCBI taxdump.tar.gz file that contains, at the minimum, a nodes.dmp and names.dmp file.",
patterns: ["*.tar.gz", "*.tar.lz4", "*.tar.bz2", "*.tar.zst"]
Expand Down Expand Up @@ -114,17 +104,8 @@ workflow classify_multi {
assembler = "spades",
reads_unmapped_bam = rmdup_ubam.dedup_bam,
trim_clip_db = trim_clip_db,
spades_min_contig_len = 800,
always_succeed = true
}
if(defined(blast_db_tgz) && defined(krona_taxonomy_db_blast_tgz)) {
call metagenomics.blastx as blastx {
input:
contigs_fasta = spades.contigs_fasta,
blast_db_tgz = select_first([blast_db_tgz]),
krona_taxonomy_db_tgz = select_first([krona_taxonomy_db_blast_tgz])
}
}
}

call reports.MultiQC as multiqc_raw {
Expand Down Expand Up @@ -163,14 +144,6 @@ workflow classify_multi {
out_basename = "merged-kraken2.krona"
}

if(defined(blast_db_tgz) && defined(krona_taxonomy_db_blast_tgz)) {
call metagenomics.krona_merge as krona_merge_blastx {
input:
krona_reports = select_all(blastx.krona_report_html),
out_basename = "merged-spades-blastx.krona"
}
}

output {
Array[File] cleaned_reads_unaligned_bams = deplete.bam_filtered_to_taxa
Array[File] deduplicated_reads_unaligned = rmdup_ubam.dedup_bam
Expand All @@ -187,12 +160,9 @@ workflow classify_multi {
File spikein_counts = spike_summary.count_summary
File kraken2_merged_krona = krona_merge_kraken2.krona_report_html
File kraken2_summary = metag_summary_report.krakenuniq_aggregate_taxlevel_summary
File? blastx_merged_krona = krona_merge_blastx.krona_report_html
Array[File] kraken2_summary_reports = kraken2.kraken2_summary_report
Array[File] kraken2_krona_by_sample = kraken2.krona_report_html
Array[File] blastx_report_by_sample = select_all(blastx.blast_report)
Array[File] blastx_krona_by_sample = select_all(blastx.krona_report_html)
String kraken2_viral_classify_version = kraken2.viralngs_version[0]
String deplete_viral_classify_version = deplete.viralngs_version[0]
Expand Down
110 changes: 110 additions & 0 deletions pipes/cromwell/cromwell.local-travis.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Documentation
# https://cromwell.readthedocs.io/en/stable/backends/Local/

# Define a new backend provider.

LocalExample {

# The actor that runs the backend. In this case, it's the Shared File System (SFS) ConfigBackend.
actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory"

# The backend custom configuration.
config {

# Optional limits on the number of concurrent jobs
concurrent-job-limit = 1

# If true submits scripts to the bash background using "&". Only usefull for dispatchers that do NOT submit
# the job and then immediately return a scheduled job id.
run-in-background = true

# `temporary-directory` creates the temporary directory for commands.
#
# If this value is not set explicitly, the default value creates a unique temporary directory, equivalent to:
# temporary-directory = "$(mktemp -d \"$PWD\"/tmp.XXXXXX)"
#
# The expression is run from the execution directory for the script. The expression must create the directory
# if it does not exist, and then return the full path to the directory.
#
# To create and return a non-random temporary directory, use something like:
# temporary-directory = "$(mkdir -p /tmp/mydir && echo /tmp/mydir)"

# `script-epilogue` configures a shell command to run after the execution of every command block.
#
# If this value is not set explicitly, the default value is `sync`, equivalent to:
# script-epilogue = "sync"
#
# To turn off the default `sync` behavior set this value to an empty string:
# script-epilogue = ""

# `glob-link-command` specifies command used to link glob outputs, by default using hard-links.
# If filesystem doesn't allow hard-links (e.g., beeGFS), change to soft-links as follows:
# glob-link-command = "ln -sL GLOB_PATTERN GLOB_DIRECTORY"

# The list of possible runtime custom attributes.
runtime-attributes = """
String? docker
String? docker_user
"""

# Submit string when there is no "docker" runtime attribute.
submit = "/usr/bin/env bash ${script}"

# Submit string when there is a "docker" runtime attribute.
submit-docker = """
docker run \
--rm -i \
${"--user " + docker_user} \
--entrypoint ${job_shell} \
-v ${cwd}:${docker_cwd} \
${docker} ${script}
"""

# Root directory where Cromwell writes job results. This directory must be
# visible and writeable by the Cromwell process as well as the jobs that Cromwell
# launches.
root = "cromwell-executions"

# Root directory where Cromwell writes job results in the container. This value
# can be used to specify where the execution folder is mounted in the container.
# it is used for the construction of the docker_cwd string in the submit-docker
# value above.
dockerRoot = "/cromwell-executions"

# File system configuration.
filesystems {

# For SFS backends, the "local" configuration specifies how files are handled.
local {

# Try to hard link (ln), then soft-link (ln -s), and if both fail, then copy the files.
localization: [
"hard-link", "soft-link", "copy"
]

# Call caching strategies
caching {
# When copying a cached result, what type of file duplication should occur.
# For more information check: https://cromwell.readthedocs.io/en/stable/backends/HPC/#shared-filesystem
duplication-strategy: [
"hard-link", "soft-link", "copy"
]

# Strategy to determine if a file has been used before.
# For extended explanation and alternative strategies check: https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching
hashing-strategy: "md5"

# When true, will check if a sibling file with the same name and the .md5 extension exists, and if it does, use the content of this file as a hash.
# If false or the md5 does not exist, will proceed with the above-defined hashing strategy.
check-sibling-md5: false
}
}
}

# The defaults for runtime attributes if not provided.
default-runtime-attributes {
failOnStderr: false
continueOnReturnCode: 0
}
}
}
3 changes: 2 additions & 1 deletion travis/tests-cromwell.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ for workflow in ../pipes/WDL/workflows/*.wdl; do
date
echo "Executing $workflow_name using Cromwell on local instance"
# the "cat" is to allow a pipe failure (otherwise it halts because of set -e)
java -jar cromwell.jar run \
java -Dconfig.file=../pipes/cromwell/cromwell.local-travis.conf \
-jar cromwell.jar run \
$workflow_name.wdl \
-i $input_json | tee cromwell.out
if [ ${PIPESTATUS[0]} -gt 0 ]; then
Expand Down

0 comments on commit 0ec8cb1

Please sign in to comment.