diff --git a/.github/workflows/phylogenetic.yaml b/.github/workflows/phylogenetic.yaml index d64c2c55..0dcf0ce6 100644 --- a/.github/workflows/phylogenetic.yaml +++ b/.github/workflows/phylogenetic.yaml @@ -32,13 +32,13 @@ on: type: string sequences_url: description: | - URL for a sequences.fasta.zst file. + URL for sequences_{serotype}.fasta.zst files where {serotype} will be replaced by all, denv1 to denv4. If not provided, will use default sequences_url from phylogenetic/config/config_dengue.yaml required: false type: string metadata_url: description: | - URL for a metadata.tsv.zst file. + URL for metadata_{serotype}.tsv.zst files where {serotype} will be replaced by all, denv1 to denv4. If not provided, will use default metadata_url from phylogenetic/config/config_dengue.yaml required: false type: string diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml index 243217fb..5ec437e8 100644 --- a/phylogenetic/config/config_dengue.yaml +++ b/phylogenetic/config/config_dengue.yaml @@ -1,3 +1,9 @@ +# Sequences must be FASTA and metadata must be TSV +# Both files must be zstd compressed +# Both files must have a {serotype} expandable field to be replaced by all, denv1-denv4 +sequences_url: "https://data.nextstrain.org/files/workflows/dengue/sequences_{serotype}.fasta.zst" +metadata_url: "https://data.nextstrain.org/files/workflows/dengue/metadata_{serotype}.tsv.zst" + strain_id_field: "genbank_accession" display_strain_field: "strain" diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index b9521a4c..7f651b68 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -21,8 +21,8 @@ rule download: metadata = "data/metadata_{serotype}.tsv.zst" params: - sequences_url = "https://data.nextstrain.org/files/workflows/dengue/sequences_{serotype}.fasta.zst", - metadata_url = "https://data.nextstrain.org/files/workflows/dengue/metadata_{serotype}.tsv.zst" + sequences_url = config["sequences_url"], + metadata_url = config["metadata_url"], shell: """ curl -fsSL --compressed {params.sequences_url:q} --output {output.sequences}