Skip to content

Commit

Permalink
Merge pull request #74 from broadinstitute/dp-augur
Browse files Browse the repository at this point in the history
small nextstrain workflow updates
  • Loading branch information
dpark01 committed May 18, 2020
2 parents 6f1cff5 + 2abcfbe commit c270be7
Show file tree
Hide file tree
Showing 15 changed files with 6,745 additions and 14 deletions.
88 changes: 76 additions & 12 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ task filter_subsample_sequences {
}
input {
File sequences_fasta
File metadata_tsv
File sample_metadata_tsv
Int? sequences_per_group
String? group_by
Expand All @@ -94,11 +94,22 @@ task filter_subsample_sequences {
Int? machine_mem_gb
String docker = "nextstrain/base"
}
parameter_meta {
sequences_fasta: {
description: "Set of sequences in fasta format to subsample using augur filter. These must represent a single chromosome/segment of a genome only.",
patterns: ["*.fasta", "*.fa"]
}
sample_metadata_tsv: {
description: "Metadata in tab-separated text format. See https://nextstrain-augur.readthedocs.io/en/stable/faq/metadata.html for details.",
patterns: ["*.txt", "*.tsv"]
}
}
String in_basename = basename(sequences_fasta, ".fasta")
command {
augur version > VERSION
augur filter \
--sequences ~{sequences_fasta} \
--metadata ~{metadata_tsv} \
--metadata ~{sample_metadata_tsv} \
~{"--min-date " + min_date} \
~{"--max-date " + max_date} \
~{"--min-length " + min_length} \
Expand All @@ -123,6 +134,7 @@ task filter_subsample_sequences {
}
output {
File filtered_fasta = "~{in_basename}.filtered.fasta"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -143,6 +155,7 @@ task augur_mafft_align {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur align --sequences ~{sequences} \
--reference-sequence ~{ref_fasta} \
--output ~{basename}_aligned.fasta \
Expand All @@ -151,7 +164,7 @@ task augur_mafft_align {
~{true="--remove-reference" false="" remove_reference} \
--debug \
--nthreads auto
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MAX_RAM
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes
}
runtime {
docker: docker
Expand All @@ -164,7 +177,7 @@ task augur_mafft_align {
output {
File aligned_sequences = "~{basename}_aligned.fasta"
File align_troubleshoot = stdout()
File max_ram_usage_in_bytes = "MAX_RAM"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -186,6 +199,7 @@ task draft_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur tree --alignment ~{aligned_fasta} \
--output ~{basename}_raw_tree.nwk \
--method ~{default="iqtree" method} \
Expand All @@ -205,6 +219,7 @@ task draft_augur_tree {
}
output {
File aligned_tree = "~{basename}_raw_tree.nwk"
String augur_version = read_string("VERSION")
}
}
Expand Down Expand Up @@ -238,6 +253,7 @@ task refine_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur refine \
--tree ~{raw_tree} \
--alignment ~{aligned_fasta} \
Expand Down Expand Up @@ -272,6 +288,7 @@ task refine_augur_tree {
output {
File tree_refined = "~{basename}_refined_tree.nwk"
File branch_lengths = "~{basename}_branch_lengths.json"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -293,6 +310,7 @@ task ancestral_traits {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur traits \
--tree ~{tree} \
--metadata ~{metadata} \
Expand All @@ -311,6 +329,7 @@ task ancestral_traits {
}
output {
File node_data_json = "~{basename}_nodes.json"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -334,6 +353,7 @@ task ancestral_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur ancestral \
--tree ~{refined_tree} \
--alignment ~{aligned_fasta} \
Expand All @@ -357,6 +377,7 @@ task ancestral_tree {
output {
File nt_muts_json = "~{basename}_nt_muts.json"
File sequences = "~{basename}_ancestral_sequences.fasta"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -378,6 +399,7 @@ task translate_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur translate --tree ~{refined_tree} \
--ancestral-sequences ~{nt_muts} \
--reference-sequence ~{genbank_gb} \
Expand All @@ -396,6 +418,7 @@ task translate_augur_tree {
}
output {
File aa_muts_json = "~{basename}_aa_muts.json"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -416,6 +439,7 @@ task augur_import_beast {
}
String tree_basename = basename(beast_mcc_tree, ".tree")
command {
augur version > VERSION
augur import beast \
--mcc "~{beast_mcc_tree}" \
--output-tree "~{tree_basename}.nwk" \
Expand All @@ -436,6 +460,7 @@ task augur_import_beast {
output {
File tree_newick = "~{tree_basename}.nwk"
File node_data_json = "~{tree_basename}.json"
String augur_version = read_string("VERSION")
}
}
Expand All @@ -445,28 +470,66 @@ task export_auspice_json {
}
input {
File auspice_config
File? metadata
File? sample_metadata
File tree
Array[File] node_data_jsons
File? lat_longs_tsv
File? colors_tsv
File? lat_longs_tsv
File? colors_tsv
Array[String]? geo_resolutions
Array[String]? color_by_metadata
File? description_md
Array[String]? maintainers
String? title
Int? machine_mem_gb
String docker = "nextstrain/base"
}
String out_basename = basename(basename(tree, ".nwk"), "_refined_tree")
command {
NODE_DATA_FLAG=""
augur version > VERSION
touch exportargs
# --node-data
if [ -n "~{sep=' ' node_data_jsons}" ]; then
NODE_DATA_FLAG="--node-data "
echo "--node-data" >> exportargs
cat "~{write_lines(node_data_jsons)}" >> exportargs
fi
# --geo-resolutions
VALS="~{write_lines(select_first([geo_resolutions, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--geo-resolutions" >> exportargs;
fi
cat $VALS >> exportargs
# --color-by-metadata
VALS="~{write_lines(select_first([color_by_metadata, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--color-by-metadata" >> exportargs;
fi
augur export v2 --tree ~{tree} \
~{"--metadata " + metadata} \
$NODE_DATA_FLAG ~{sep=' ' node_data_jsons}\
cat $VALS >> exportargs
# --title
if [ -n "~{title}" ]; then
echo "--title" >> exportargs
echo "~{title}" >> exportargs
fi
# --maintainers
VALS="~{write_lines(select_first([maintainers, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--maintainers" >> exportargs;
fi
cat $VALS >> exportargs
cat exportargs | tr '\n' '\0' | xargs -0 -t augur export v2 \
--tree ~{tree} \
~{"--metadata " + sample_metadata} \
--auspice-config ~{auspice_config} \
~{"--lat-longs " + lat_longs_tsv} \
~{"--colors " + colors_tsv} \
~{"--description_md " + description_md} \
--output ~{out_basename}_auspice.json
}
runtime {
Expand All @@ -479,5 +542,6 @@ task export_auspice_json {
}
output {
File virus_json = "~{out_basename}_auspice.json"
String augur_version = read_string("VERSION")
}
}
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_taxon_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ task deplete_taxa {
$DBS_BMTAGGER $DBS_BLAST $DBS_BWA \
${'--chunkSize=' + query_chunk_size} \
$TAGS_TO_CLEAR \
--JVMmemory="$mem_in_mb_50"m \
--JVMmemory="$mem_in_mb_75"m \
--srprismMemory=$mem_in_mb_75 \
--loglevel=DEBUG

Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/workflows/build_augur_tree.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ workflow build_augur_tree {
call nextstrain.export_auspice_json {
input:
tree = refine_augur_tree.tree_refined,
metadata = sample_metadata,
sample_metadata = sample_metadata,
node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
Expand Down
14 changes: 14 additions & 0 deletions pipes/WDL/workflows/newick_to_auspice.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version 1.0

import "../tasks/tasks_nextstrain.wdl" as nextstrain

workflow newick_to_auspice {
meta {
description: "Convert a newick formatted phylogenetic tree into a json suitable for auspice visualization. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/export.html"
}

call nextstrain.export_auspice_json
output {
File auspice_json = export_auspice_json.virus_json
}
}
14 changes: 14 additions & 0 deletions pipes/WDL/workflows/subsample_by_metadata.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version 1.0

import "../tasks/tasks_nextstrain.wdl" as nextstrain

workflow subsample_by_metadata {
meta {
description: "Filter and subsample a sequence set. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/filter.html"
}

call nextstrain.filter_subsample_sequences
output {
File filtered_fasta = filter_subsample_sequences.filtered_fasta
}
}
12 changes: 12 additions & 0 deletions test/input/WDL/test_inputs-build_augur_tree-local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"build_augur_tree.genbank_gb": "test/input/zika-tutorial/config/zika_outgroup.gb",
"build_augur_tree.ref_fasta": "test/input/zika-tutorial/data/KX369547.1.fna",
"build_augur_tree.export_auspice_json.colors_tsv": "test/input/zika-tutorial/config/colors.tsv",
"build_augur_tree.virus": "testrun",
"build_augur_tree.export_auspice_json.auspice_config": "test/input/zika-tutorial/config/auspice_config.json",
"build_augur_tree.ancestral_traits_to_infer": ["region", "country"],
"build_augur_tree.export_auspice_json.lat_longs_tsv": "test/input/zika-tutorial/config/lat_longs.tsv",
"build_augur_tree.assembly_fastas": ["test/input/zika-tutorial/data/sequences.fasta"],
"build_augur_tree.sample_metadata": "test/input/zika-tutorial/data/metadata.tsv"
}

5 changes: 5 additions & 0 deletions test/input/zika-tutorial/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Nextstrain build for Zika virus tutorial

This repository provides the data and scripts associated with the [Zika virus tutorial](https://nextstrain.org/docs/getting-started/zika-tutorial).

See the [original Zika build repository](https://github.com/nextstrain/zika) for more about the public build.
50 changes: 50 additions & 0 deletions test/input/zika-tutorial/config/auspice_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"title": "Tutorial Nextstrain build for Zika virus",
"maintainers": [
{"name": "Trevor Bedford", "url": "http://bedford.io/team/trevor-bedford/"}
],
"build_url": "https://github.com/nextstrain/zika-tutorial",
"colorings": [
{
"key": "gt",
"title": "Genotype",
"type": "categorical"
},
{
"key": "num_date",
"title": "Date",
"type": "continuous"
},
{
"key": "author",
"title": "Author",
"type": "categorical"
},
{
"key": "country",
"title": "Country",
"type": "categorical"
},
{
"key": "region",
"title": "Region",
"type": "categorical"
}
],
"geo_resolutions": [
"country",
"region"
],
"panels": [
"tree",
"map"
],
"display_defaults": {
"map_triplicate": true
},
"filters": [
"country",
"region",
"author"
]
}
34 changes: 34 additions & 0 deletions test/input/zika-tutorial/config/colors.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
country thailand #511EA8
country vietnam #4928B4
country singapore #4334BF
country french polynesia #4041C7
country american samoa #3F50CC
country fiji #3F5ED0
country tonga #416CCE
country china #4379CD
country taiwan #4784C7
country japan #4B8FC1
country italy #5098B9
country brazil #56A0AF
country peru #5CA7A4
country ecuador #63AC99
country colombia #6BB18E
country french guiana #73B583
country suriname #7CB878
country venezuela #86BB6E
country panama #A4BE56
country nicaragua #AFBD4F
country honduras #B9BC4A
country el salvador #C2BA46
country guatemala #CCB742
country mexico #D3B240
country martinique #DAAC3D
country guadeloupe #DFA43B
country saint barthelemy #E39B39
country usvi #E68F36
country puerto rico #E68234
country jamaica #E67431
country dominican republic #E4632E
country haiti #E1512A
country cuba #DF4027
country usa #DC2F24
Loading

0 comments on commit c270be7

Please sign in to comment.