From 55ea0ce0f9edbdd19ba62b4c564ee8cdc77ecc3d Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 21 Mar 2024 15:55:49 -0700 Subject: [PATCH 01/11] Add reference files for N450 region Reference is comprised of 450bp of the N gene from the same sample that is used for the genome tree (NCBI Accession NC_001498.1). --- .../defaults/measles_reference_N450.fasta | 8 +++ .../defaults/measles_reference_N450.gb | 69 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 phylogenetic/defaults/measles_reference_N450.fasta create mode 100644 phylogenetic/defaults/measles_reference_N450.gb diff --git a/phylogenetic/defaults/measles_reference_N450.fasta b/phylogenetic/defaults/measles_reference_N450.fasta new file mode 100644 index 0000000..eab1954 --- /dev/null +++ b/phylogenetic/defaults/measles_reference_N450.fasta @@ -0,0 +1,8 @@ +>lcl|NC_001498.1_cds_NP_056918.1_1 [gene=N] [locus_tag=MeVgp1] [db_xref=GeneID:1489804] [protein=nucleocapsid protein] [protein_id=NP_056918.1] [location=1233..1682] [gbkey=CDS] +GTCAGTTCCACATTGGCATCCGAACTCGGTATCACTGCCGAGGATGCAAGGCTTGTTTCAGAGAT +TGCAATGCATACTACTGAGGACAGGATCAGTAGAGCGGTCGGACCCAGACAAGCCCAAGTGTCATTTCTA +CACGGTGATCAAAGTGAGAATGAGCTACCAGGATTGGGGGGCAAGGAAGATAGGAGGGTCAAACAGGGTC +GGGGAGAAGCCAGGGAGAGCTACAGAGAAACCGGGTCCAGCAGAGCAAGTGATGCGAGAGCTGCCCATCC +TCCAACCAGCATGCCCCTAGACATTGACACTGCATCGGAGTCAGGCCAAGATCCGCAGGACAGTCGAAGG +TCAGCTGACGCCCTGCTCAGGCTGCAAGCCATGGCAGGAATCTTGGAAGAACAAGGCTCAGACACGGACA +CCCCTAGGGTATACAATGACAGAGATCTTCTAGAC diff --git a/phylogenetic/defaults/measles_reference_N450.gb b/phylogenetic/defaults/measles_reference_N450.gb new file mode 100644 index 0000000..277c96a --- /dev/null +++ b/phylogenetic/defaults/measles_reference_N450.gb @@ -0,0 +1,69 @@ +LOCUS NC_001498 450 bp cRNA linear VRL 13-AUG-2018 +DEFINITION Measles virus, complete genome. +ACCESSION NC_001498 REGION: 1233..1682 +VERSION NC_001498.1 +DBLINK Project: 15025 + BioProject: PRJNA485481 +KEYWORDS RefSeq. +SOURCE Measles morbillivirus + ORGANISM Measles morbillivirus + Viruses; Riboviria; Orthornavirae; Negarnaviricota; + Haploviricotina; Monjiviricetes; Mononegavirales; Paramyxoviridae; + Orthoparamyxovirinae; Morbillivirus; Morbillivirus hominis. +REFERENCE 1 (sites) + AUTHORS Rima,B.K. and Duprex,W.P. + TITLE The measles virus replication cycle + JOURNAL Curr. Top. Microbiol. Immunol. 329, 77-102 (2009) + PUBMED 19198563 +REFERENCE 2 + AUTHORS Takeuchi,K., Miyajima,N., Kobune,F. and Tashiro,M. + TITLE Comparative nucleotide sequence analyses of the entire genomes of + B95a cell-isolated and vero cell-isolated measles viruses from the + same patient + JOURNAL Virus Genes 20 (3), 253-257 (2000) + PUBMED 10949953 +REFERENCE 3 (bases 1 to 450) + CONSRTM NCBI Genome Project + TITLE Direct Submission + JOURNAL Submitted (01-AUG-2000) National Center for Biotechnology + Information, NIH, Bethesda, MD 20894, USA +REFERENCE 4 (bases 1 to 450) + AUTHORS Takeuchi,K., Tanabayashi,K. and Tashiro,M. + TITLE Direct Submission + JOURNAL Submitted (10-JUL-1998) Kaoru Takeuchi, National Institute of + Infectious Diseases, Viral Disease and Vaccine Contorol; 4-7-1 + Gakuen, Musashi-murayama, Tokyo 208-0011, Japan + (E-mail:ktake@nih.go.jp, Tel:81-42-561-0771(ex.530), + Fax:81-42-567-5631) +COMMENT REVIEWED REFSEQ: This record has been curated by NCBI staff. The + reference sequence was derived from AB016162. + Sequence updated (21-Jul-1998) + Sequence updated (11-Dec-1998). + COMPLETENESS: full length. +FEATURES Location/Qualifiers + source 1..450 + /organism="Measles morbillivirus" + /mol_type="viral cRNA" + /strain="Ichinose-B95a" + /db_xref="taxon:11234" + CDS <1..>450 + /gene="N" + /codon_start=1 + /product="nucleocapsid protein" + /protein_id="NP_056918.1" + /db_xref="GeneID:1489804" + /translation="VSSTLASELGITAEDAR + LVSEIAMHTTEDRISRAVGPRQAQVSFLHGDQSENELPGLGGKEDRRVKQGRGEARES + YRETGSSRASDARAAHPPTSMPLDIDTASESGQDPQDSRRSADALLRLQAMAGILEEQ + GSDTDTPRVYNDRDLLD" +ORIGIN + 1 gtcagttcca cattggcatc cgaactcggt atcactgccg aggatgcaag gcttgtttca + 61 gagattgcaa tgcatactac tgaggacagg atcagtagag cggtcggacc cagacaagcc + 121 caagtgtcat ttctacacgg tgatcaaagt gagaatgagc taccaggatt ggggggcaag + 181 gaagatagga gggtcaaaca gggtcgggga gaagccaggg agagctacag agaaaccggg + 241 tccagcagag caagtgatgc gagagctgcc catcctccaa ccagcatgcc cctagacatt + 301 gacactgcat cggagtcagg ccaagatccg caggacagtc gaaggtcagc tgacgccctg + 361 ctcaggctgc aagccatggc aggaatcttg gaagaacaag gctcagacac ggacacccct + 421 agggtataca atgacagaga tcttctagac +// + From edbefd52b3222ba92cd7fb6b9bbf6f9d3b72a26a Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:24:20 -0700 Subject: [PATCH 02/11] Prepare N450 sequences for phylogenetic analysis * Add rule to align sequences to N450 reference using nextclade * Add rule to filter by length, date, country --- phylogenetic/Snakefile | 1 + phylogenetic/defaults/config.yaml | 7 +++ phylogenetic/rules/prepare_sequences.smk | 2 +- phylogenetic/rules/prepare_sequences_N450.smk | 58 +++++++++++++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 phylogenetic/rules/prepare_sequences_N450.smk diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index c1bbbd6..aeb9b53 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -5,6 +5,7 @@ rule all: auspice_json = "auspice/measles.json", include: "rules/prepare_sequences.smk" +include: "rules/prepare_sequences_N450.smk" include: "rules/construct_phylogeny.smk" include: "rules/annotate_phylogeny.smk" include: "rules/export.smk" diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index ac82fcc..2c91b35 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -2,6 +2,8 @@ strain_id_field: "accession" files: exclude: "defaults/dropped_strains.txt" reference: "defaults/measles_reference.gb" + reference_N450: "defaults/measles_reference_N450.gb" + reference_N450_fasta: "defaults/measles_reference_N450.fasta" colors: "defaults/colors.tsv" auspice_config: "defaults/auspice_config.json" filter: @@ -9,6 +11,11 @@ filter: sequences_per_group: 20 min_date: 1950 min_length: 5000 +filter_N450: + group_by: "country year" + subsample_max_sequences: 3000 + min_date: 1950 + min_length: 400 refine: coalescent: "opt" date_inference: "marginal" diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index af6c9c6..20816f5 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -74,7 +74,7 @@ rule align: sequences = "results/filtered.fasta", reference = config["files"]["reference"] output: - alignment = "results/aligned.fasta" + alignment = "results/aligned_genome.fasta" shell: """ augur align \ diff --git a/phylogenetic/rules/prepare_sequences_N450.smk b/phylogenetic/rules/prepare_sequences_N450.smk new file mode 100644 index 0000000..8a5336f --- /dev/null +++ b/phylogenetic/rules/prepare_sequences_N450.smk @@ -0,0 +1,58 @@ +""" +This part of the workflow prepares sequences for constructing the phylogenetic tree for 450bp of the N gene. + +See Augur's usage docs for these commands for more details. +""" + +rule align_and_extract_N450: + input: + sequences = "data/sequences.fasta", + reference = config["files"]["reference_N450_fasta"] + output: + sequences = "results/sequences_N450.fasta" + params: + min_length = config['filter_N450']['min_length'] + shell: + """ + nextclade run \ + -j 1 \ + --input-ref {input.reference} \ + --output-fasta {output.sequences} \ + --min-seed-cover 0.01 \ + --min-length {params.min_length} \ + --silent \ + {input.sequences} + """ +rule filter_N450: + """ + Filtering to + - {params.sequences_per_group} sequence(s) per {params.group_by!s} + - excluding strains in {input.exclude} + - minimum genome length of {params.min_length} + - excluding strains with missing region, country or date metadata + """ + input: + sequences = "results/sequences_N450.fasta", + metadata = "data/metadata.tsv", + exclude = config["files"]["exclude"] + output: + sequences = "results/aligned_N450.fasta" + params: + group_by = config['filter_N450']['group_by'], + subsample_max_sequences = config["filter_N450"]["subsample_max_sequences"], + min_date = config["filter_N450"]["min_date"], + min_length = config['filter_N450']['min_length'], + strain_id = config["strain_id_field"] + shell: + """ + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --metadata-id-columns {params.strain_id} \ + --exclude {input.exclude} \ + --output {output.sequences} \ + --group-by {params.group_by} \ + --subsample-max-sequences {params.subsample_max_sequences} \ + --min-date {params.min_date} \ + --min-length {params.min_length} + """ \ No newline at end of file From 8343876ebde2c8a87e51ecfffdb6529a723871ad Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:34:19 -0700 Subject: [PATCH 03/11] Construct phylogeny for N450 region --- phylogenetic/Snakefile | 2 ++ phylogenetic/rules/construct_phylogeny.smk | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index aeb9b53..1e3ce3f 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -1,3 +1,5 @@ +genes = ['N450', 'genome'] + configfile: "defaults/config.yaml" rule all: diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index 3a5f6e8..162edc1 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -7,9 +7,9 @@ See Augur's usage docs for these commands for more details. rule tree: """Building tree""" input: - alignment = "results/aligned.fasta" + alignment = "results/aligned_{gene}.fasta" output: - tree = "results/tree_raw.nwk" + tree = "results/tree_raw_{gene}.nwk" shell: """ augur tree \ @@ -26,12 +26,12 @@ rule refine: - filter tips more than {params.clock_filter_iqd} IQDs from clock expectation """ input: - tree = "results/tree_raw.nwk", - alignment = "results/aligned.fasta", + tree = "results/tree_raw_{gene}.nwk", + alignment = "results/aligned_{gene}.fasta", metadata = "data/metadata.tsv" output: - tree = "results/tree.nwk", - node_data = "results/branch_lengths.json" + tree = "results/tree_{gene}.nwk", + node_data = "results/branch_lengths_{gene}.json" params: coalescent = config["refine"]["coalescent"], date_inference = config["refine"]["date_inference"], From d55342bf2dbbcf2d1e017cbf3178fcb5feeca74f Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:35:35 -0700 Subject: [PATCH 04/11] Annotate phylogeny for N450 region --- phylogenetic/rules/annotate_phylogeny.smk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 2f8eec4..06a71c9 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -8,10 +8,10 @@ See Augur's usage docs for these commands for more details. rule ancestral: """Reconstructing ancestral sequences and mutations""" input: - tree = "results/tree.nwk", - alignment = "results/aligned.fasta" + tree = "results/tree_{gene}.nwk", + alignment = "results/aligned_{gene}.fasta" output: - node_data = "results/nt_muts.json" + node_data = "results/nt_muts_{gene}.json" params: inference = config["ancestral"]["inference"] shell: @@ -26,11 +26,11 @@ rule ancestral: rule translate: """Translating amino acid sequences""" input: - tree = "results/tree.nwk", - node_data = "results/nt_muts.json", - reference = config["files"]["reference"] + tree = "results/tree_{gene}.nwk", + node_data = "results/nt_muts_{gene}.json", + reference = lambda wildcard: "defaults/measles_reference.gb" if wildcard.gene in ["genome"] else "defaults/measles_reference_{gene}.gb" output: - node_data = "results/aa_muts.json" + node_data = "results/aa_muts_{gene}.json" shell: """ augur translate \ From ab92a1b7c57b9b776a625b442b47b577ca92596f Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 21 Mar 2024 16:37:50 -0700 Subject: [PATCH 05/11] Export phylogeny for N450 region --- phylogenetic/Snakefile | 2 +- .../defaults/auspice_config_N450.json | 49 +++++++++++++++++++ phylogenetic/defaults/config.yaml | 1 + phylogenetic/rules/export.smk | 14 +++--- 4 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 phylogenetic/defaults/auspice_config_N450.json diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index 1e3ce3f..b4521ff 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -4,7 +4,7 @@ configfile: "defaults/config.yaml" rule all: input: - auspice_json = "auspice/measles.json", + auspice_json = expand("auspice/measles_{gene}.json", gene=genes) include: "rules/prepare_sequences.smk" include: "rules/prepare_sequences_N450.smk" diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json new file mode 100644 index 0000000..6e7a2f4 --- /dev/null +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -0,0 +1,49 @@ +{ + "title": "Real-time tracking of measles virus evolution", + "maintainers": [ + {"name": "Kim Andrews", "url": "https://bedford.io/team/kim-andrews/"}, + {"name": "the Nextstrain team", "url": "https://nextstrain.org/team"} + ], + "build_url": "https://github.com/nextstrain/measles", + "colorings": [ + { + "key": "gt", + "title": "Genotype", + "type": "categorical" + }, + { + "key": "num_date", + "title": "Date", + "type": "continuous" + }, + { + "key": "author", + "title": "Author", + "type": "categorical" + }, + { + "key": "country", + "title": "Country", + "type": "categorical" + }, + { + "key": "region", + "title": "Region", + "type": "categorical" + } + ], + "geo_resolutions": [ + "country", + "region" + ], + "display_defaults": { + "map_triplicate": true, + "distance_measure": "div", + "layout": "unrooted" + }, + "filters": [ + "country", + "region", + "author" + ] +} diff --git a/phylogenetic/defaults/config.yaml b/phylogenetic/defaults/config.yaml index 2c91b35..035e5b3 100644 --- a/phylogenetic/defaults/config.yaml +++ b/phylogenetic/defaults/config.yaml @@ -6,6 +6,7 @@ files: reference_N450_fasta: "defaults/measles_reference_N450.fasta" colors: "defaults/colors.tsv" auspice_config: "defaults/auspice_config.json" + auspice_config_N450: "defaults/auspice_config_N450.json" filter: group_by: "country year month" sequences_per_group: 20 diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index c9ec2da..471043e 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -8,15 +8,17 @@ See Augur's usage docs for these commands for more details. rule export: """Exporting data files for for auspice""" input: - tree = "results/tree.nwk", + tree = "results/tree_{gene}.nwk", metadata = "data/metadata.tsv", - branch_lengths = "results/branch_lengths.json", - nt_muts = "results/nt_muts.json", - aa_muts = "results/aa_muts.json", + branch_lengths = "results/branch_lengths_{gene}.json", + nt_muts = "results/nt_muts_{gene}.json", + aa_muts = "results/aa_muts_{gene}.json", colors = config["files"]["colors"], - auspice_config = config["files"]["auspice_config"] + auspice_config = lambda wildcard: "defaults/auspice_config.json" if wildcard.gene in ["genome"] else "defaults/auspice_config_N450.json" + output: - auspice_json = rules.all.input.auspice_json + auspice_json = "auspice/measles_{gene}.json", + root_sequence = "auspice/measles_{gene}_root-sequence.json" params: strain_id = config["strain_id_field"] shell: From f39ba8b12ad2ccb980302792d5fe9123cb89ded7 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:44:11 -0700 Subject: [PATCH 06/11] Use `--stochastic-resolve` option for `augur refine` --- phylogenetic/rules/construct_phylogeny.smk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index 162edc1..979d026 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -50,6 +50,7 @@ rule refine: --coalescent {params.coalescent} \ --date-confidence \ --date-inference {params.date_inference} \ - --clock-filter-iqd {params.clock_filter_iqd} + --clock-filter-iqd {params.clock_filter_iqd} \ + --stochastic-resolve """ \ No newline at end of file From 119fbcf947cfb35bbb1533987660a14732d49fe1 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:45:09 -0700 Subject: [PATCH 07/11] =?UTF-8?q?Remove=20"country"=20colors=20from=C2=A0`?= =?UTF-8?q?defaults/colors.tsv`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- phylogenetic/defaults/colors.tsv | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/phylogenetic/defaults/colors.tsv b/phylogenetic/defaults/colors.tsv index ce960e3..55a9f8b 100644 --- a/phylogenetic/defaults/colors.tsv +++ b/phylogenetic/defaults/colors.tsv @@ -4,23 +4,3 @@ region Africa #8ABB6A region Europe #BEBB48 region South America #E29E39 region North America #E2562B - -country india #511EA8 -country china #4333BE -country vietnam #3F4ECB -country south korea #4169CF -country japan #4682C9 -country australia #4F96BB -country new zealand #5AA5A8 -country russia #68AF92 -country gambia #78B77D -country sudan #8BBB6A -country morocco #9EBE59 -country italy #B3BD4D -country germany #C5B945 -country france #D5B03F -country netherlands #E0A23A -country united kingdom #E68D36 -country brazil #E67231 -country usa #E1502A -country canada #DC2F24 From 862dbaf7ca74227824784933bb9f9e1c94b4d218 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 28 Mar 2024 11:49:56 -0700 Subject: [PATCH 08/11] Change default display to rooted time-tree --- phylogenetic/defaults/auspice_config_N450.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json index 6e7a2f4..d2781dc 100644 --- a/phylogenetic/defaults/auspice_config_N450.json +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -37,9 +37,7 @@ "region" ], "display_defaults": { - "map_triplicate": true, - "distance_measure": "div", - "layout": "unrooted" + "map_triplicate": true }, "filters": [ "country", From 8bea320b2fe50e6bc8220b4d33028a4480088ea4 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:19:03 -0700 Subject: [PATCH 09/11] Use `--metadata-columns` to export "author" in auspice_config.json --- phylogenetic/defaults/auspice_config.json | 8 +++----- phylogenetic/defaults/auspice_config_N450.json | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/phylogenetic/defaults/auspice_config.json b/phylogenetic/defaults/auspice_config.json index c010bb9..27f2484 100644 --- a/phylogenetic/defaults/auspice_config.json +++ b/phylogenetic/defaults/auspice_config.json @@ -16,11 +16,6 @@ "title": "Date", "type": "continuous" }, - { - "key": "author", - "title": "Author", - "type": "categorical" - }, { "key": "country", "title": "Country", @@ -43,5 +38,8 @@ "country", "region", "author" + ], + "metadata_columns": [ + "author" ] } diff --git a/phylogenetic/defaults/auspice_config_N450.json b/phylogenetic/defaults/auspice_config_N450.json index d2781dc..5193bc0 100644 --- a/phylogenetic/defaults/auspice_config_N450.json +++ b/phylogenetic/defaults/auspice_config_N450.json @@ -16,11 +16,6 @@ "title": "Date", "type": "continuous" }, - { - "key": "author", - "title": "Author", - "type": "categorical" - }, { "key": "country", "title": "Country", @@ -43,5 +38,8 @@ "country", "region", "author" + ], + "metadata_columns": [ + "author" ] } From a9d26441067035b49de0a79e55435849888a3433 Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:20:34 -0700 Subject: [PATCH 10/11] Organize builds as directories within `results` --- phylogenetic/rules/annotate_phylogeny.smk | 12 ++++++------ phylogenetic/rules/construct_phylogeny.smk | 12 ++++++------ phylogenetic/rules/export.smk | 8 ++++---- phylogenetic/rules/prepare_sequences.smk | 6 +++--- phylogenetic/rules/prepare_sequences_N450.smk | 6 +++--- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index 06a71c9..61e94b8 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -8,10 +8,10 @@ See Augur's usage docs for these commands for more details. rule ancestral: """Reconstructing ancestral sequences and mutations""" input: - tree = "results/tree_{gene}.nwk", - alignment = "results/aligned_{gene}.fasta" + tree = "results/{gene}/tree.nwk", + alignment = "results/{gene}/aligned.fasta" output: - node_data = "results/nt_muts_{gene}.json" + node_data = "results/{gene}/nt_muts.json" params: inference = config["ancestral"]["inference"] shell: @@ -26,11 +26,11 @@ rule ancestral: rule translate: """Translating amino acid sequences""" input: - tree = "results/tree_{gene}.nwk", - node_data = "results/nt_muts_{gene}.json", + tree = "results/{gene}/tree.nwk", + node_data = "results/{gene}/nt_muts.json", reference = lambda wildcard: "defaults/measles_reference.gb" if wildcard.gene in ["genome"] else "defaults/measles_reference_{gene}.gb" output: - node_data = "results/aa_muts_{gene}.json" + node_data = "results/{gene}/aa_muts.json" shell: """ augur translate \ diff --git a/phylogenetic/rules/construct_phylogeny.smk b/phylogenetic/rules/construct_phylogeny.smk index 979d026..e222800 100644 --- a/phylogenetic/rules/construct_phylogeny.smk +++ b/phylogenetic/rules/construct_phylogeny.smk @@ -7,9 +7,9 @@ See Augur's usage docs for these commands for more details. rule tree: """Building tree""" input: - alignment = "results/aligned_{gene}.fasta" + alignment = "results/{gene}/aligned.fasta" output: - tree = "results/tree_raw_{gene}.nwk" + tree = "results/{gene}/tree_raw.nwk" shell: """ augur tree \ @@ -26,12 +26,12 @@ rule refine: - filter tips more than {params.clock_filter_iqd} IQDs from clock expectation """ input: - tree = "results/tree_raw_{gene}.nwk", - alignment = "results/aligned_{gene}.fasta", + tree = "results/{gene}/tree_raw.nwk", + alignment = "results/{gene}/aligned.fasta", metadata = "data/metadata.tsv" output: - tree = "results/tree_{gene}.nwk", - node_data = "results/branch_lengths_{gene}.json" + tree = "results/{gene}/tree.nwk", + node_data = "results/{gene}/branch_lengths.json" params: coalescent = config["refine"]["coalescent"], date_inference = config["refine"]["date_inference"], diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index 471043e..dcd1893 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -8,11 +8,11 @@ See Augur's usage docs for these commands for more details. rule export: """Exporting data files for for auspice""" input: - tree = "results/tree_{gene}.nwk", + tree = "results/{gene}/tree.nwk", metadata = "data/metadata.tsv", - branch_lengths = "results/branch_lengths_{gene}.json", - nt_muts = "results/nt_muts_{gene}.json", - aa_muts = "results/aa_muts_{gene}.json", + branch_lengths = "results/{gene}/branch_lengths.json", + nt_muts = "results/{gene}/nt_muts.json", + aa_muts = "results/{gene}/aa_muts.json", colors = config["files"]["colors"], auspice_config = lambda wildcard: "defaults/auspice_config.json" if wildcard.gene in ["genome"] else "defaults/auspice_config_N450.json" diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 20816f5..c333bb2 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -44,7 +44,7 @@ rule filter: metadata = "data/metadata.tsv", exclude = config["files"]["exclude"] output: - sequences = "results/filtered.fasta" + sequences = "results/genome/filtered.fasta" params: group_by = config["filter"]["group_by"], sequences_per_group = config["filter"]["sequences_per_group"], @@ -71,10 +71,10 @@ rule align: - filling gaps with N """ input: - sequences = "results/filtered.fasta", + sequences = "results/genome/filtered.fasta", reference = config["files"]["reference"] output: - alignment = "results/aligned_genome.fasta" + alignment = "results/genome/aligned.fasta" shell: """ augur align \ diff --git a/phylogenetic/rules/prepare_sequences_N450.smk b/phylogenetic/rules/prepare_sequences_N450.smk index 8a5336f..68a8db0 100644 --- a/phylogenetic/rules/prepare_sequences_N450.smk +++ b/phylogenetic/rules/prepare_sequences_N450.smk @@ -9,7 +9,7 @@ rule align_and_extract_N450: sequences = "data/sequences.fasta", reference = config["files"]["reference_N450_fasta"] output: - sequences = "results/sequences_N450.fasta" + sequences = "results/N450/sequences.fasta" params: min_length = config['filter_N450']['min_length'] shell: @@ -32,11 +32,11 @@ rule filter_N450: - excluding strains with missing region, country or date metadata """ input: - sequences = "results/sequences_N450.fasta", + sequences = "results/N450/sequences.fasta", metadata = "data/metadata.tsv", exclude = config["files"]["exclude"] output: - sequences = "results/aligned_N450.fasta" + sequences = "results/N450/aligned.fasta" params: group_by = config['filter_N450']['group_by'], subsample_max_sequences = config["filter_N450"]["subsample_max_sequences"], From bf83a42a1a21d6041f9cb4d78ba79b866c35ebcf Mon Sep 17 00:00:00 2001 From: Kim Andrews <17375001+kimandrews@users.noreply.github.com> Date: Mon, 1 Apr 2024 16:12:57 -0700 Subject: [PATCH 11/11] Update Changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 103b249..f563361 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,5 @@ # CHANGELOG +* 1 April 2024: Create a tree using the 450 nucleotides encoding the carboxyl-terminal 150 amino acids of the nucleoprotein (N450), which is highly represented on NCBI for measles. [PR #20](https://github.com/nextstrain/measles/pull/20) * 15 March 2024: Connect ingest and phylogenetic workflows to follow the pathogen-repo-guide by uploading ingest output to S3, downloading ingest output from S3 to phylogenetic directory, using "accession" column as the ID column, and using a color scheme that matches the new region name format. [PR #19](https://github.com/nextstrain/measles/pull/19) * 1 March 2024: Add phylogenetic directory to follow the pathogen-repo-guide, and update the CI workflow to match the new file structure. [PR #18](https://github.com/nextstrain/measles/pull/18) * 14 February 2024: Add ingest directory from pathogen-repo-guide and make measles-specific modifications. [PR #10](https://github.com/nextstrain/measles/pull/10)