Skip to content

Commit

Permalink
Add pytests
Browse files Browse the repository at this point in the history
- all samples dropped
- metadata id cols
- priority column mismatch
- date parsing
  • Loading branch information
victorlin committed Feb 17, 2022
1 parent 3fcd258 commit 1513f1e
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 0 deletions.
52 changes: 52 additions & 0 deletions tests/test_filter_data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,55 @@ def test_load_priority_scores_does_not_exist(self, tmpdir):
filter_obj = get_filter_obj_run(args)
with pytest.raises(FileNotFoundError):
filter_obj.db_load_priorities_table()

def test_load_invalid_id_column(self, tmpdir):
data = [
("invalid_name","date","country"),
("SEQ_1","2020-01-XX","A"),
]
args = get_valid_args(data, tmpdir)
with pytest.raises(ValueError) as e_info:
get_filter_obj_run(args)
assert str(e_info.value) == "None of the possible id columns (['strain', 'name']) were found in the metadata's columns ('invalid_name', 'date', 'country')"

def test_load_custom_id_column(self, tmpdir):
data = [
("custom_id_col","date","country"),
("SEQ_1","2020-01-XX","A"),
]
args = get_valid_args(data, tmpdir)
args.metadata_id_columns = ["custom_id_col"]
filter_obj = get_filter_obj_run(args)
results = query_fetchall(filter_obj, f"""
SELECT custom_id_col FROM {METADATA_TABLE_NAME}
""")
assert results == [("SEQ_1",)]

def test_load_custom_id_column_with_spaces(self, tmpdir):
data = [
("strain name with spaces","date","country"),
("SEQ_1","2020-01-XX","A"),
]
args = get_valid_args(data, tmpdir)
args.metadata_id_columns = ["strain name with spaces"]
filter_obj = get_filter_obj_run(args)
results = query_fetchall(filter_obj, f"""
SELECT "strain name with spaces" FROM {METADATA_TABLE_NAME}
""")
assert results == [("SEQ_1",)]

def test_load_priority_scores_extra_column(self, tmpdir):
"""Attempt to load a priority score file with an extra column raises a ValueError."""
content = "strain1\t5\tbad_col\n"
filter_obj = get_filter_obj_with_priority_loaded(tmpdir, content)
with pytest.raises(ValueError) as e_info:
filter_obj.db_load_priorities_table()
assert str(e_info.value) == f"Failed to parse priority file {filter_obj.args.priority}."

def test_load_priority_scores_missing_column(self, tmpdir):
"""Attempt to load a priority score file with a missing column raises a ValueError."""
content = "strain1\n"
filter_obj = get_filter_obj_with_priority_loaded(tmpdir, content)
with pytest.raises(ValueError) as e_info:
filter_obj.db_load_priorities_table()
assert str(e_info.value) == f"Failed to parse priority file {filter_obj.args.priority}."
165 changes: 165 additions & 0 deletions tests/test_filter_date_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import pytest
from treetime.utils import numeric_date
from datetime import date

from augur.filter_support.db.sqlite import (
DATE_MIN_COL,
DATE_MAX_COL,
DATE_TABLE_NAME,
)

from tests.test_filter import get_filter_obj_run, get_valid_args, query_fetchall


def get_parsed_date_min_max(date:str, tmpdir):
data = [
("strain","date"),
("SEQ_1",date),
]
args = get_valid_args(data, tmpdir)
filter_obj = get_filter_obj_run(args)
results = query_fetchall(filter_obj, f"""
SELECT {DATE_MIN_COL}, {DATE_MAX_COL} FROM {DATE_TABLE_NAME}
""")
return results[0]


class TestDateParsing:
def test_ambiguous_day(self, tmpdir):
"""Ambiguous day yields a certain min/max range."""
date_min, date_max = get_parsed_date_min_max(
"2018-01-XX", tmpdir)
assert date_min == pytest.approx(2018.001, abs=1e-3)
assert date_max == pytest.approx(2018.083, abs=1e-3)

def test_missing_day(self, tmpdir):
"""Date without day yields a range equivalent to ambiguous day."""
date_min, date_max = get_parsed_date_min_max(
"2018-01", tmpdir)
assert date_min == pytest.approx(2018.001, abs=1e-3)
assert date_max == pytest.approx(2018.083, abs=1e-3)

def test_ambiguous_month(self, tmpdir):
"""Ambiguous month yields a certain min/max range."""
date_min, date_max = get_parsed_date_min_max(
"2018-XX-XX", tmpdir)
assert date_min == pytest.approx(2018.001, abs=1e-3)
assert date_max == pytest.approx(2018.999, abs=1e-3)

def test_missing_month(self, tmpdir):
"""Date without month/day yields a range equivalent to ambiguous month/day."""
date_min, date_max = get_parsed_date_min_max(
"2018", tmpdir)
assert date_min == pytest.approx(2018.001, abs=1e-3)
assert date_max == pytest.approx(2018.999, abs=1e-3)

def test_numerical_exact_year(self, tmpdir):
"""Numerical year ending in .0 should be interpreted as exact."""
date_min, date_max = get_parsed_date_min_max(
"2018.0", tmpdir)
assert date_min == pytest.approx(2018.001, abs=1e-3)
assert date_max == pytest.approx(2018.001, abs=1e-3)

def test_ambiguous_year(self, tmpdir):
"""Ambiguous year replaces X with 0 (min) and 9 (max)."""
date_min, date_max = get_parsed_date_min_max(
"201X-XX-XX", tmpdir)
assert date_min == pytest.approx(2010.001, abs=1e-3)
assert date_max == pytest.approx(2019.999, abs=1e-3)

def test_ambiguous_year_incomplete_date(self, tmpdir):
"""Ambiguous year without month/day yields a range equivalent to ambiguous month/day counterpart."""
date_min, date_max = get_parsed_date_min_max(
"201X", tmpdir)
assert date_min == pytest.approx(2010.001, abs=1e-3)
assert date_max == pytest.approx(2019.999, abs=1e-3)

def test_ambiguous_year_decade(self, tmpdir):
"""Parse year-only ambiguous date with ambiguous decade."""
date_min, date_max = get_parsed_date_min_max(
"10X1", tmpdir)
assert date_min == pytest.approx(1001.001, abs=1e-3)
assert date_max == pytest.approx(1091.999, abs=1e-3)

def test_ambiguous_year_incomplete_date(self, tmpdir):
"""Ambiguous year without explicit X fails parsing."""
date_min, date_max = get_parsed_date_min_max("201x", tmpdir)
assert date_min == None
assert date_max == None

def test_future_year(self, tmpdir):
"""Date from the future should be converted to today."""
date_min, date_max = get_parsed_date_min_max(
"3000", tmpdir)
assert date_min == pytest.approx(numeric_date(date.today()), abs=1e-3)
assert date_max == pytest.approx(numeric_date(date.today()), abs=1e-3)

# TODO: DateDisambiguator parity: assert_only_less_significant_uncertainty
@pytest.mark.skip(reason="not implemented")
def test_assert_only_less_significant_uncertainty(self, tmpdir):
"""Date from the future should be converted to today."""
date_min, date_max = get_parsed_date_min_max("2018-XX-01", tmpdir)
assert date_min == None
assert date_max == None

def test_out_of_bounds_month(self, tmpdir):
"""Out-of-bounds month cannot be parsed."""
date_min, date_max = get_parsed_date_min_max("2018-00-01", tmpdir)
assert date_min == None
assert date_max == None
date_min, date_max = get_parsed_date_min_max("2018-13-01", tmpdir)
assert date_min == None
assert date_max == None

def test_out_of_bounds_day(self, tmpdir):
"""Out-of-bounds day cannot be parsed."""
date_min, date_max = get_parsed_date_min_max("2018-01-00", tmpdir)
assert date_min == None
assert date_max == None
date_min, date_max = get_parsed_date_min_max("2018-02-30", tmpdir)
assert date_min == None
assert date_max == None

def test_negative_iso_date_error(self, tmpdir):
"""Negative ISO dates are unsupported."""
date_min, date_max = get_parsed_date_min_max("-2018-01-01", tmpdir)
assert date_min == None
assert date_max == None

def test_negative_ambiguous_iso_date_error(self, tmpdir):
"""Negative ambiguous ISO dates are unsupported."""
date_min, date_max = get_parsed_date_min_max("-2018-XX-XX", tmpdir)
assert date_min == None
assert date_max == None

def test_negative_iso_date_missing_day_error(self, tmpdir):
"""Negative incomplete ISO dates are unsupported."""
date_min, date_max = get_parsed_date_min_max("-2018-01", tmpdir)
assert date_min == None
assert date_max == None

def test_negative_iso_date_missing_month_day_error(self, tmpdir):
"""Negative incomplete ISO dates are unsupported."""
date_min, date_max = get_parsed_date_min_max("-2018", tmpdir)
assert date_min == None
assert date_max == None

def test_negative_numeric_date(self, tmpdir):
"""Parse negative numeric date."""
date_min, date_max = get_parsed_date_min_max(
"-2018.0", tmpdir)
assert date_min == pytest.approx(-2018.0, abs=1e-3)
assert date_max == pytest.approx(-2018.0, abs=1e-3)

def test_zero_year_error(self, tmpdir):
"""Zero year-only date is unsupported."""
date_min, date_max = get_parsed_date_min_max("0", tmpdir)
assert date_min == None
assert date_max == None

def test_zero_year(self, tmpdir):
"""Parse the date 0.0."""
date_min, date_max = get_parsed_date_min_max(
"0.0", tmpdir)
assert date_min == pytest.approx(0.0, abs=1e-3)
assert date_max == pytest.approx(0.0, abs=1e-3)
30 changes: 30 additions & 0 deletions tests/test_filter_filtering.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import pytest
from augur.filter_support.db.sqlite import (
EXCLUDE_COL,
FILTER_REASON_COL,
INCLUDE_COL,
METADATA_FILTER_REASON_TABLE_NAME,
)
from augur.filter_support.exceptions import FilterException

from test_filter import (
get_filter_obj_run,
Expand Down Expand Up @@ -110,3 +112,31 @@ def test_filter_by_max_date(self, tmpdir):
WHERE {FILTER_REASON_COL} = 'filter_by_max_date'
""")
assert results == [("SEQ_3",)]

def test_filter_by_exclude_where(self, tmpdir):
"""Filter by max date, inclusive."""
data = [("strain","location","quality"),
("SEQ_1","colorado","good"),
("SEQ_2","colorado","bad"),
("SEQ_3","nevada","good")]
args = get_valid_args(data, tmpdir)
args.exclude_where = ["location=colorado"]
filter_obj = get_filter_obj_run(args)
results = query_fetchall(filter_obj, f"""
SELECT strain
FROM {METADATA_FILTER_REASON_TABLE_NAME}
WHERE {FILTER_REASON_COL} = 'filter_by_exclude_where'
""")
assert results == [("SEQ_1",), ("SEQ_2",)]

def test_filter_by_exclude_where_missing_column_error(self, tmpdir):
"""Filter by max date, inclusive."""
data = [("strain","location","quality"),
("SEQ_1","colorado","good"),
("SEQ_2","colorado","bad"),
("SEQ_3","nevada","good")]
args = get_valid_args(data, tmpdir)
args.exclude_where = ["invalid=colorado"]
with pytest.raises(FilterException) as e_info:
get_filter_obj_run(args)
assert str(e_info.value) == 'no such column: invalid'
16 changes: 16 additions & 0 deletions tests/test_filter_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,19 @@ def test_filter_groupby_only_year_month_provided(self, tmpdir):
WHERE {FILTER_REASON_COL} IS NULL
""")
assert results == [("SEQ_1",), ("SEQ_2",), ("SEQ_3",), ("SEQ_4",), ("SEQ_5",)]

def test_all_samples_dropped(self, tmpdir):
data = [
("strain","date","country"),
("SEQ_1","2020","A"),
("SEQ_2","2020","B"),
("SEQ_3","2020","C"),
("SEQ_4","2020","D"),
("SEQ_5","2020","E")
]
args = get_valid_args(data, tmpdir)
args.group_by = ["country", "year", "month"]
args.sequences_per_group = 1
with pytest.raises(FilterException) as e_info:
get_filter_obj_run(args)
assert str(e_info.value) == "All samples have been dropped! Check filter rules and metadata file format."

0 comments on commit 1513f1e

Please sign in to comment.