Skip to content

Commit

Permalink
Deprecate read_strains from utils, move to io
Browse files Browse the repository at this point in the history
  • Loading branch information
victorlin committed Dec 20, 2023
1 parent aae0640 commit 4c09005
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 16 deletions.
10 changes: 10 additions & 0 deletions DEPRECATED.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,13 @@ January 2024 or after.*

`augur export v2` was introduced in Augur version 6.0.0. Migrate by following
the [official guide](https://docs.nextstrain.org/projects/augur/page/releases/migrating-v5-v6.html).

## `augur.utils.read_strains`

*Deprecated December 2023. Planned for removal March 2024 or after.*

This is part of a [larger effort](https://github.com/nextstrain/augur/issues/1011)
to formalize Augur's Python API.

We recognize the existing usage of this function, so it has been moved to
`augur.io.read_strains`.
2 changes: 1 addition & 1 deletion augur/filter/include_exclude_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from augur.errors import AugurError
from augur.io.metadata import METADATA_DATE_COLUMN
from augur.io.print import print_err
from augur.io.strains import read_strains
from augur.io.vcf import is_vcf as filename_is_vcf
from augur.utils import read_strains
from . import constants

try:
Expand Down
1 change: 1 addition & 0 deletions augur/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .file import open_file # noqa: F401
from .metadata import read_metadata # noqa: F401
from .sequences import read_sequences, write_sequences # noqa: F401
from .strains import read_strains # noqa: F401
27 changes: 27 additions & 0 deletions augur/io/strains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from augur.utils import read_entries


def read_strains(*files, comment_char="#"):
"""Reads strain names from one or more plain text files and returns the
set of distinct strains.
Strain names can be commented with full-line or inline comments. For
example, the following is a valid strain names file::
# this is a comment at the top of the file
strain1 # exclude strain1 because it isn't sequenced properly
strain2
# this is an empty line that will be ignored.
Parameters
----------
files : iterable of str
one or more names of text files with one strain name per line
Returns
-------
set :
strain names from the given input files
"""
return set(read_entries(*files, comment_char=comment_char))
5 changes: 5 additions & 0 deletions augur/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
import os, json, sys
import pandas as pd
from collections import defaultdict, OrderedDict
from textwrap import dedent
from .__version__ import __version__

from augur.data import as_file
from augur.io.file import open_file
from augur.io.print import print_err

from augur.types import ValidationMode
from augur.errors import AugurError
Expand Down Expand Up @@ -738,6 +740,9 @@ def load_mask_sites(mask_file):


def read_strains(*files, comment_char="#"):
print_err(dedent("""
DEPRECATION WARNING: augur.utils.read_strains is no longer maintained and will be removed in the future.
Please use augur.io.read_strains instead."""))
return set(read_entries(*files, comment_char=comment_char))


Expand Down
19 changes: 19 additions & 0 deletions tests/io/test_strains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pathlib import Path

from augur.io.strains import read_strains


def test_read_strains(tmpdir):
# Write one list of filenames with some unnecessary whitespace.
strains1 = Path(tmpdir) / Path("strains1.txt")
with open(strains1, "w") as oh:
oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n # this is a comment preceded by whitespace.\n")

# Write another list of filenames with a comment.
strains2 = Path(tmpdir) / Path("strains2.txt")
with open(strains2, "w") as oh:
oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n")

strains = read_strains(strains1, strains2)
assert len(strains) == 3
assert "strain1" in strains
15 changes: 0 additions & 15 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,21 +79,6 @@ def test_read_mask_file_drm_file(self, tmpdir):
fh.write("\n".join(drm_lines))
assert utils.read_mask_file(drm_file) == expected_sites

def test_read_strains(self, tmpdir):
# Write one list of filenames with some unnecessary whitespace.
strains1 = Path(tmpdir) / Path("strains1.txt")
with open(strains1, "w") as oh:
oh.write("strain1 # this is an inline comment about strain 1\nstrain2\n # this is a comment preceded by whitespace.\n")

# Write another list of filenames with a comment.
strains2 = Path(tmpdir) / Path("strains2.txt")
with open(strains2, "w") as oh:
oh.write("# this is a comment. ignore this.\nstrain2\nstrain3\n")

strains = utils.read_strains(strains1, strains2)
assert len(strains) == 3
assert "strain1" in strains

def test_write_json_data_types(self, tmpdir):
"""write_json should be able to serialize various data types."""
data = {
Expand Down

0 comments on commit 4c09005

Please sign in to comment.