-
Notifications
You must be signed in to change notification settings - Fork 403
/
Copy pathbuilds.yaml
53 lines (45 loc) · 1.8 KB
/
builds.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
## This YAML file is sparsely commented, with a focus on the parts relevant to multiple inputs
## See my_profiles/example/builds.yaml for more general comments
## See docs/multiple_inputs.md for a walkthrough of this config.
# custom_rules:
# - my_profiles/example_multiple_inputs/rules.smk
inputs:
- name: "aus"
metadata: "data/example_metadata_aus.tsv"
sequences: "data/example_sequences_aus.fasta"
- name: "worldwide"
metadata: "data/example_metadata_worldwide.tsv"
sequences: "data/example_sequences_worldwide.fasta"
builds:
multiple-inputs:
subsampling_scheme: custom-scheme # use a custom subsampling scheme defined below
# STAGE 1: Input-specific filtering parameters
filter:
aus:
min_length: 5000 # Allow shorter genomes. Parameter used to filter alignment.
skip_diagnostics: True # skip diagnostics (which can remove genomes) for this input
# STAGE 2: Subsampling parameters
subsampling:
custom-scheme:
# Use metadata key to include ALL from `input1`
allFromAus:
exclude: "--exclude-where 'aus!=yes'" # subset to sequences from input `aus`
# Proximity subsampling from `worldwide` input to provide context
worldwideContext:
exclude: "--exclude-where 'aus=yes'" # i.e. subset to sequences _not_ from input `aus`
group_by: "year" # NOTE: `augur filter` needs this to use `max_sequences` (TODO)
max_sequences: 100
priorities:
type: "proximity"
focus: "allFromAus"
worldwideBackground:
exclude: "--exclude-where 'aus=yes'"
group_by: year month
seq_per_group: 5
files:
auspice_config: "my_profiles/example_multiple_inputs/my_auspice_config.json"
description: "my_profiles/example_multiple_inputs/my_description.md"
traits:
multiple-inputs:
sampling_bias_correction: 2.5
columns: ["country"]