Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

small nextstrain workflow updates #74

Merged
merged 12 commits into from
May 18, 2020
88 changes: 76 additions & 12 deletions pipes/WDL/tasks/tasks_nextstrain.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ task filter_subsample_sequences {
}
input {
File sequences_fasta
File metadata_tsv
File sample_metadata_tsv

Int? sequences_per_group
String? group_by
Expand All @@ -94,11 +94,22 @@ task filter_subsample_sequences {
Int? machine_mem_gb
String docker = "nextstrain/base"
}
parameter_meta {
sequences_fasta: {
description: "Set of sequences in fasta format to subsample using augur filter. These must represent a single chromosome/segment of a genome only.",
patterns: ["*.fasta", "*.fa"]
}
sample_metadata_tsv: {
description: "Metadata in tab-separated text format. See https://nextstrain-augur.readthedocs.io/en/stable/faq/metadata.html for details.",
patterns: ["*.txt", "*.tsv"]
}
}
String in_basename = basename(sequences_fasta, ".fasta")
command {
augur version > VERSION
augur filter \
--sequences ~{sequences_fasta} \
--metadata ~{metadata_tsv} \
--metadata ~{sample_metadata_tsv} \
~{"--min-date " + min_date} \
~{"--max-date " + max_date} \
~{"--min-length " + min_length} \
Expand All @@ -123,6 +134,7 @@ task filter_subsample_sequences {
}
output {
File filtered_fasta = "~{in_basename}.filtered.fasta"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -143,6 +155,7 @@ task augur_mafft_align {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur align --sequences ~{sequences} \
--reference-sequence ~{ref_fasta} \
--output ~{basename}_aligned.fasta \
Expand All @@ -151,7 +164,7 @@ task augur_mafft_align {
~{true="--remove-reference" false="" remove_reference} \
--debug \
--nthreads auto
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes > MAX_RAM
cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes
}
runtime {
docker: docker
Expand All @@ -164,7 +177,7 @@ task augur_mafft_align {
output {
File aligned_sequences = "~{basename}_aligned.fasta"
File align_troubleshoot = stdout()
File max_ram_usage_in_bytes = "MAX_RAM"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -186,6 +199,7 @@ task draft_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur tree --alignment ~{aligned_fasta} \
--output ~{basename}_raw_tree.nwk \
--method ~{default="iqtree" method} \
Expand All @@ -205,6 +219,7 @@ task draft_augur_tree {
}
output {
File aligned_tree = "~{basename}_raw_tree.nwk"
String augur_version = read_string("VERSION")
}
}

Expand Down Expand Up @@ -238,6 +253,7 @@ task refine_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur refine \
--tree ~{raw_tree} \
--alignment ~{aligned_fasta} \
Expand Down Expand Up @@ -272,6 +288,7 @@ task refine_augur_tree {
output {
File tree_refined = "~{basename}_refined_tree.nwk"
File branch_lengths = "~{basename}_branch_lengths.json"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -293,6 +310,7 @@ task ancestral_traits {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur traits \
--tree ~{tree} \
--metadata ~{metadata} \
Expand All @@ -311,6 +329,7 @@ task ancestral_traits {
}
output {
File node_data_json = "~{basename}_nodes.json"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -334,6 +353,7 @@ task ancestral_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur ancestral \
--tree ~{refined_tree} \
--alignment ~{aligned_fasta} \
Expand All @@ -357,6 +377,7 @@ task ancestral_tree {
output {
File nt_muts_json = "~{basename}_nt_muts.json"
File sequences = "~{basename}_ancestral_sequences.fasta"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -378,6 +399,7 @@ task translate_augur_tree {
String docker = "nextstrain/base"
}
command {
augur version > VERSION
augur translate --tree ~{refined_tree} \
--ancestral-sequences ~{nt_muts} \
--reference-sequence ~{genbank_gb} \
Expand All @@ -396,6 +418,7 @@ task translate_augur_tree {
}
output {
File aa_muts_json = "~{basename}_aa_muts.json"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -416,6 +439,7 @@ task augur_import_beast {
}
String tree_basename = basename(beast_mcc_tree, ".tree")
command {
augur version > VERSION
augur import beast \
--mcc "~{beast_mcc_tree}" \
--output-tree "~{tree_basename}.nwk" \
Expand All @@ -436,6 +460,7 @@ task augur_import_beast {
output {
File tree_newick = "~{tree_basename}.nwk"
File node_data_json = "~{tree_basename}.json"
String augur_version = read_string("VERSION")
}
}

Expand All @@ -445,28 +470,66 @@ task export_auspice_json {
}
input {
File auspice_config
File? metadata
File? sample_metadata
File tree
Array[File] node_data_jsons

File? lat_longs_tsv
File? colors_tsv
File? lat_longs_tsv
File? colors_tsv
Array[String]? geo_resolutions
Array[String]? color_by_metadata
File? description_md
Array[String]? maintainers
String? title

Int? machine_mem_gb
String docker = "nextstrain/base"
}
String out_basename = basename(basename(tree, ".nwk"), "_refined_tree")
command {
NODE_DATA_FLAG=""
augur version > VERSION
touch exportargs

# --node-data
if [ -n "~{sep=' ' node_data_jsons}" ]; then
NODE_DATA_FLAG="--node-data "
echo "--node-data" >> exportargs
cat "~{write_lines(node_data_jsons)}" >> exportargs
fi

# --geo-resolutions
VALS="~{write_lines(select_first([geo_resolutions, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--geo-resolutions" >> exportargs;
fi
cat $VALS >> exportargs

# --color-by-metadata
VALS="~{write_lines(select_first([color_by_metadata, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--color-by-metadata" >> exportargs;
fi
augur export v2 --tree ~{tree} \
~{"--metadata " + metadata} \
$NODE_DATA_FLAG ~{sep=' ' node_data_jsons}\
cat $VALS >> exportargs

# --title
if [ -n "~{title}" ]; then
echo "--title" >> exportargs
echo "~{title}" >> exportargs
fi

# --maintainers
VALS="~{write_lines(select_first([maintainers, []]))}"
if [ -n "$(cat $VALS)" ]; then
echo "--maintainers" >> exportargs;
fi
cat $VALS >> exportargs

cat exportargs | tr '\n' '\0' | xargs -0 -t augur export v2 \
--tree ~{tree} \
~{"--metadata " + sample_metadata} \
--auspice-config ~{auspice_config} \
~{"--lat-longs " + lat_longs_tsv} \
~{"--colors " + colors_tsv} \
~{"--description_md " + description_md} \
--output ~{out_basename}_auspice.json
}
runtime {
Expand All @@ -479,5 +542,6 @@ task export_auspice_json {
}
output {
File virus_json = "~{out_basename}_auspice.json"
String augur_version = read_string("VERSION")
}
}
2 changes: 1 addition & 1 deletion pipes/WDL/tasks/tasks_taxon_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ task deplete_taxa {
$DBS_BMTAGGER $DBS_BLAST $DBS_BWA \
${'--chunkSize=' + query_chunk_size} \
$TAGS_TO_CLEAR \
--JVMmemory="$mem_in_mb_50"m \
--JVMmemory="$mem_in_mb_75"m \
--srprismMemory=$mem_in_mb_75 \
--loglevel=DEBUG

Expand Down
2 changes: 1 addition & 1 deletion pipes/WDL/workflows/build_augur_tree.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ workflow build_augur_tree {
call nextstrain.export_auspice_json {
input:
tree = refine_augur_tree.tree_refined,
metadata = sample_metadata,
sample_metadata = sample_metadata,
node_data_jsons = select_all([
refine_augur_tree.branch_lengths,
ancestral_traits.node_data_json,
Expand Down
14 changes: 14 additions & 0 deletions pipes/WDL/workflows/newick_to_auspice.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version 1.0

import "../tasks/tasks_nextstrain.wdl" as nextstrain

workflow newick_to_auspice {
meta {
description: "Convert a newick formatted phylogenetic tree into a json suitable for auspice visualization. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/export.html"
}

call nextstrain.export_auspice_json
output {
File auspice_json = export_auspice_json.virus_json
}
}
14 changes: 14 additions & 0 deletions pipes/WDL/workflows/subsample_by_metadata.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version 1.0

import "../tasks/tasks_nextstrain.wdl" as nextstrain

workflow subsample_by_metadata {
meta {
description: "Filter and subsample a sequence set. See https://nextstrain-augur.readthedocs.io/en/stable/usage/cli/filter.html"
}

call nextstrain.filter_subsample_sequences
output {
File filtered_fasta = filter_subsample_sequences.filtered_fasta
}
}
12 changes: 12 additions & 0 deletions test/input/WDL/test_inputs-build_augur_tree-local.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"build_augur_tree.genbank_gb": "test/input/zika-tutorial/config/zika_outgroup.gb",
"build_augur_tree.ref_fasta": "test/input/zika-tutorial/data/KX369547.1.fna",
"build_augur_tree.export_auspice_json.colors_tsv": "test/input/zika-tutorial/config/colors.tsv",
"build_augur_tree.virus": "testrun",
"build_augur_tree.export_auspice_json.auspice_config": "test/input/zika-tutorial/config/auspice_config.json",
"build_augur_tree.ancestral_traits_to_infer": ["region", "country"],
"build_augur_tree.export_auspice_json.lat_longs_tsv": "test/input/zika-tutorial/config/lat_longs.tsv",
"build_augur_tree.assembly_fastas": ["test/input/zika-tutorial/data/sequences.fasta"],
"build_augur_tree.sample_metadata": "test/input/zika-tutorial/data/metadata.tsv"
}

5 changes: 5 additions & 0 deletions test/input/zika-tutorial/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Nextstrain build for Zika virus tutorial

This repository provides the data and scripts associated with the [Zika virus tutorial](https://nextstrain.org/docs/getting-started/zika-tutorial).

See the [original Zika build repository](https://github.com/nextstrain/zika) for more about the public build.
50 changes: 50 additions & 0 deletions test/input/zika-tutorial/config/auspice_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"title": "Tutorial Nextstrain build for Zika virus",
"maintainers": [
{"name": "Trevor Bedford", "url": "http://bedford.io/team/trevor-bedford/"}
],
"build_url": "https://github.com/nextstrain/zika-tutorial",
"colorings": [
{
"key": "gt",
"title": "Genotype",
"type": "categorical"
},
{
"key": "num_date",
"title": "Date",
"type": "continuous"
},
{
"key": "author",
"title": "Author",
"type": "categorical"
},
{
"key": "country",
"title": "Country",
"type": "categorical"
},
{
"key": "region",
"title": "Region",
"type": "categorical"
}
],
"geo_resolutions": [
"country",
"region"
],
"panels": [
"tree",
"map"
],
"display_defaults": {
"map_triplicate": true
},
"filters": [
"country",
"region",
"author"
]
}
34 changes: 34 additions & 0 deletions test/input/zika-tutorial/config/colors.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
country thailand #511EA8
country vietnam #4928B4
country singapore #4334BF
country french polynesia #4041C7
country american samoa #3F50CC
country fiji #3F5ED0
country tonga #416CCE
country china #4379CD
country taiwan #4784C7
country japan #4B8FC1
country italy #5098B9
country brazil #56A0AF
country peru #5CA7A4
country ecuador #63AC99
country colombia #6BB18E
country french guiana #73B583
country suriname #7CB878
country venezuela #86BB6E
country panama #A4BE56
country nicaragua #AFBD4F
country honduras #B9BC4A
country el salvador #C2BA46
country guatemala #CCB742
country mexico #D3B240
country martinique #DAAC3D
country guadeloupe #DFA43B
country saint barthelemy #E39B39
country usvi #E68F36
country puerto rico #E68234
country jamaica #E67431
country dominican republic #E4632E
country haiti #E1512A
country cuba #DF4027
country usa #DC2F24
Loading