Deprecate read_strains from utils, move to io

nextstrain · Dec 20, 2023 · 4c09005 · 4c09005
1 parent aae0640
commit 4c09005
Show file tree

Hide file tree

Showing 7 changed files with 63 additions and 16 deletions.
diff --git a/DEPRECATED.md b/DEPRECATED.md
@@ -11,3 +11,13 @@ January 2024 or after.*
 
 `augur export v2` was introduced in Augur version 6.0.0. Migrate by following
 the [official guide](https://docs.nextstrain.org/projects/augur/page/releases/migrating-v5-v6.html).
+
+## `augur.utils.read_strains`
+
+*Deprecated December 2023. Planned for removal March 2024 or after.*
+
+This is part of a [larger effort](https://github.com/nextstrain/augur/issues/1011)
+to formalize Augur's Python API.
+
+We recognize the existing usage of this function, so it has been moved to
+`augur.io.read_strains`.
diff --git a/augur/filter/include_exclude_rules.py b/augur/filter/include_exclude_rules.py
@@ -9,8 +9,8 @@
 from augur.errors import AugurError
 from augur.io.metadata import METADATA_DATE_COLUMN
 from augur.io.print import print_err
+from augur.io.strains import read_strains
 from augur.io.vcf import is_vcf as filename_is_vcf
-from augur.utils import read_strains
 from . import constants
 
 try:

diff --git a/augur/io/__init__.py b/augur/io/__init__.py
@@ -5,3 +5,4 @@
 from .file import open_file  # noqa: F401
 from .metadata import read_metadata  # noqa: F401
 from .sequences import read_sequences, write_sequences  # noqa: F401
+from .strains import read_strains  # noqa: F401
diff --git a/augur/io/strains.py b/augur/io/strains.py
@@ -0,0 +1,27 @@
+from augur.utils import read_entries
+
+
+def read_strains(*files, comment_char="#"):
+    """Reads strain names from one or more plain text files and returns the
+    set of distinct strains.
+
+    Strain names can be commented with full-line or inline comments. For
+    example, the following is a valid strain names file::
+
+        # this is a comment at the top of the file
+        strain1  # exclude strain1 because it isn't sequenced properly
+        strain2
+          # this is an empty line that will be ignored.
+
+    Parameters
+    ----------
+    files : iterable of str
+        one or more names of text files with one strain name per line
+
+    Returns
+    -------
+    set :
+        strain names from the given input files
+
+    """
+    return set(read_entries(*files, comment_char=comment_char))
diff --git a/augur/utils.py b/augur/utils.py
@@ -5,10 +5,12 @@
 import os, json, sys
 import pandas as pd
 from collections import defaultdict, OrderedDict
+from textwrap import dedent
 from .__version__ import __version__
 
 from augur.data import as_file
 from augur.io.file import open_file
+from augur.io.print import print_err
 
 from augur.types import ValidationMode
 from augur.errors import AugurError
@@ -738,6 +740,9 @@ def load_mask_sites(mask_file):
 
 
 def read_strains(*files, comment_char="#"):
+    print_err(dedent("""
+        DEPRECATION WARNING: augur.utils.read_strains is no longer maintained and will be removed in the future.
+        Please use augur.io.read_strains instead."""))
     return set(read_entries(*files, comment_char=comment_char))
 
 

diff --git a/tests/io/test_strains.py b/tests/io/test_strains.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+
+from augur.io.strains import read_strains
+
+
+def test_read_strains(tmpdir):
+    # Write one list of filenames with some unnecessary whitespace.
+    strains1 = Path(tmpdir) / Path("strains1.txt")
+    with open(strains1, "w") as oh:
+        oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n   # this is a comment preceded by whitespace.\n")
+
+    # Write another list of filenames with a comment.
+    strains2 = Path(tmpdir) / Path("strains2.txt")
+    with open(strains2, "w") as oh:
+        oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n")
+
+    strains = read_strains(strains1, strains2)
+    assert len(strains) == 3
+    assert "strain1" in strains
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -79,21 +79,6 @@ def test_read_mask_file_drm_file(self, tmpdir):
             fh.write("\n".join(drm_lines))
         assert utils.read_mask_file(drm_file) == expected_sites
 
-    def test_read_strains(self, tmpdir):
-        # Write one list of filenames with some unnecessary whitespace.
-        strains1 = Path(tmpdir) / Path("strains1.txt")
-        with open(strains1, "w") as oh:
-            oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n   # this is a comment preceded by whitespace.\n")
-
-        # Write another list of filenames with a comment.
-        strains2 = Path(tmpdir) / Path("strains2.txt")
-        with open(strains2, "w") as oh:
-            oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n")
-
-        strains = utils.read_strains(strains1, strains2)
-        assert len(strains) == 3
-        assert "strain1" in strains
-
     def test_write_json_data_types(self, tmpdir):
         """write_json should be able to serialize various data types."""
         data = {