From 4c0900542349e24a2a82518a7db6d9a245a5e070 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:18:22 -0800 Subject: [PATCH] Deprecate read_strains from utils, move to io --- DEPRECATED.md | 10 ++++++++++ augur/filter/include_exclude_rules.py | 2 +- augur/io/__init__.py | 1 + augur/io/strains.py | 27 +++++++++++++++++++++++++++ augur/utils.py | 5 +++++ tests/io/test_strains.py | 19 +++++++++++++++++++ tests/test_utils.py | 15 --------------- 7 files changed, 63 insertions(+), 16 deletions(-) create mode 100644 augur/io/strains.py create mode 100644 tests/io/test_strains.py diff --git a/DEPRECATED.md b/DEPRECATED.md index 660617e28..96ec34136 100644 --- a/DEPRECATED.md +++ b/DEPRECATED.md @@ -11,3 +11,13 @@ January 2024 or after.* `augur export v2` was introduced in Augur version 6.0.0. Migrate by following the [official guide](https://docs.nextstrain.org/projects/augur/page/releases/migrating-v5-v6.html). + +## `augur.utils.read_strains` + +*Deprecated December 2023. Planned for removal March 2024 or after.* + +This is part of a [larger effort](https://github.com/nextstrain/augur/issues/1011) +to formalize Augur's Python API. + +We recognize the existing usage of this function, so it has been moved to +`augur.io.read_strains`. diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py index 9889ba2e6..9ea1e03e8 100644 --- a/augur/filter/include_exclude_rules.py +++ b/augur/filter/include_exclude_rules.py @@ -9,8 +9,8 @@ from augur.errors import AugurError from augur.io.metadata import METADATA_DATE_COLUMN from augur.io.print import print_err +from augur.io.strains import read_strains from augur.io.vcf import is_vcf as filename_is_vcf -from augur.utils import read_strains from . import constants try: diff --git a/augur/io/__init__.py b/augur/io/__init__.py index 685f9ff78..4a721bffb 100644 --- a/augur/io/__init__.py +++ b/augur/io/__init__.py @@ -5,3 +5,4 @@ from .file import open_file # noqa: F401 from .metadata import read_metadata # noqa: F401 from .sequences import read_sequences, write_sequences # noqa: F401 +from .strains import read_strains # noqa: F401 diff --git a/augur/io/strains.py b/augur/io/strains.py new file mode 100644 index 000000000..170212633 --- /dev/null +++ b/augur/io/strains.py @@ -0,0 +1,27 @@ +from augur.utils import read_entries + + +def read_strains(*files, comment_char="#"): + """Reads strain names from one or more plain text files and returns the + set of distinct strains. + + Strain names can be commented with full-line or inline comments. For + example, the following is a valid strain names file:: + + # this is a comment at the top of the file + strain1 # exclude strain1 because it isn't sequenced properly + strain2 + # this is an empty line that will be ignored. + + Parameters + ---------- + files : iterable of str + one or more names of text files with one strain name per line + + Returns + ------- + set : + strain names from the given input files + + """ + return set(read_entries(*files, comment_char=comment_char)) diff --git a/augur/utils.py b/augur/utils.py index b0790ab2c..f759a7df8 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -5,10 +5,12 @@ import os, json, sys import pandas as pd from collections import defaultdict, OrderedDict +from textwrap import dedent from .__version__ import __version__ from augur.data import as_file from augur.io.file import open_file +from augur.io.print import print_err from augur.types import ValidationMode from augur.errors import AugurError @@ -738,6 +740,9 @@ def load_mask_sites(mask_file): def read_strains(*files, comment_char="#"): + print_err(dedent(""" + DEPRECATION WARNING: augur.utils.read_strains is no longer maintained and will be removed in the future. + Please use augur.io.read_strains instead.""")) return set(read_entries(*files, comment_char=comment_char)) diff --git a/tests/io/test_strains.py b/tests/io/test_strains.py new file mode 100644 index 000000000..dfd4ffe8b --- /dev/null +++ b/tests/io/test_strains.py @@ -0,0 +1,19 @@ +from pathlib import Path + +from augur.io.strains import read_strains + + +def test_read_strains(tmpdir): + # Write one list of filenames with some unnecessary whitespace. + strains1 = Path(tmpdir) / Path("strains1.txt") + with open(strains1, "w") as oh: + oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n # this is a comment preceded by whitespace.\n") + + # Write another list of filenames with a comment. + strains2 = Path(tmpdir) / Path("strains2.txt") + with open(strains2, "w") as oh: + oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n") + + strains = read_strains(strains1, strains2) + assert len(strains) == 3 + assert "strain1" in strains diff --git a/tests/test_utils.py b/tests/test_utils.py index e4661a249..a1183cf58 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -79,21 +79,6 @@ def test_read_mask_file_drm_file(self, tmpdir): fh.write("\n".join(drm_lines)) assert utils.read_mask_file(drm_file) == expected_sites - def test_read_strains(self, tmpdir): - # Write one list of filenames with some unnecessary whitespace. - strains1 = Path(tmpdir) / Path("strains1.txt") - with open(strains1, "w") as oh: - oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n # this is a comment preceded by whitespace.\n") - - # Write another list of filenames with a comment. - strains2 = Path(tmpdir) / Path("strains2.txt") - with open(strains2, "w") as oh: - oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n") - - strains = utils.read_strains(strains1, strains2) - assert len(strains) == 3 - assert "strain1" in strains - def test_write_json_data_types(self, tmpdir): """write_json should be able to serialize various data types.""" data = {