Skip to content

Commit

Permalink
io: Define potential strain column names in a new file
Browse files Browse the repository at this point in the history
This avoids re-defining these values at each use case and prevents them
from getting out of sync.
  • Loading branch information
victorlin committed Jan 3, 2023
1 parent f8f15ff commit 28001ac
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 2 deletions.
4 changes: 3 additions & 1 deletion augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
from tempfile import NamedTemporaryFile
from typing import Collection


from .dates import numeric_date, numeric_date_type, SUPPORTED_DATE_HELP_TEXT, is_date_ambiguous, get_numerical_dates, get_iso_year_week
from .errors import AugurError
from .index import index_sequences, index_vcf
from .io.defaults import POTENTIAL_STRAIN_ID_COLUMNS
from .io.file import open_file
from .io.metadata import read_metadata
from .io.sequences import read_sequences, write_sequences
Expand Down Expand Up @@ -48,7 +50,7 @@ def register_arguments(parser):
input_group.add_argument('--sequences', '-s', help="sequences in FASTA or VCF format")
input_group.add_argument('--sequence-index', help="sequence composition report generated by augur index. If not provided, an index will be created on the fly.")
input_group.add_argument('--metadata-chunk-size', type=int, default=100000, help="maximum number of metadata records to read into memory at a time. Increasing this number can speed up filtering at the cost of more memory used.")
input_group.add_argument('--metadata-id-columns', default=["strain", "name"], nargs="+", help="names of valid metadata columns containing identifier information like 'strain' or 'name'")
input_group.add_argument('--metadata-id-columns', default=POTENTIAL_STRAIN_ID_COLUMNS, nargs="+", help="names of valid metadata columns containing identifier information like 'strain' or 'name'")

metadata_filter_group = parser.add_argument_group("metadata filters", "filters to apply to metadata")
metadata_filter_group.add_argument(
Expand Down
3 changes: 3 additions & 0 deletions augur/io/defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# For tabular files, accept the following column names to represent the unique
# row ID, in order of preference.
POTENTIAL_STRAIN_ID_COLUMNS = ("strain", "name")
3 changes: 2 additions & 1 deletion augur/io/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
from itertools import chain

from augur.errors import AugurError
from augur.io.defaults import POTENTIAL_STRAIN_ID_COLUMNS
from augur.io.print import print_err
from augur.types import DataErrorMethod
from .file import open_file


def read_metadata(metadata_file, id_columns=("strain", "name"), chunk_size=None):
def read_metadata(metadata_file, id_columns=POTENTIAL_STRAIN_ID_COLUMNS, chunk_size=None):
"""Read metadata from a given filename and into a pandas `DataFrame` or
`TextFileReader` object.
Expand Down

0 comments on commit 28001ac

Please sign in to comment.