Skip to content

Commit

Permalink
MAINT/ENH/TST: fixes biocore#982 and expands err check testing
Browse files Browse the repository at this point in the history
  • Loading branch information
wasade committed Dec 3, 2024
1 parent 06397a9 commit 47b2cc5
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 42 deletions.
72 changes: 52 additions & 20 deletions biom/err.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
from sys import stdout
from contextlib import contextmanager

import numpy as np

from biom.exception import TableException


Expand All @@ -73,6 +75,15 @@
SAMPDUP = "Duplicate sample IDs!"
OBSMDSIZE = "Size of observation metadata differs from matrix size!"
SAMPMDSIZE = "Size of sample metadata differs from matrix size!"
HASNAN = "Table contains nan values!"
HASINF = "Table contains inf values!"

IGNORE = 'ignore'
RAISE = 'raise'
CALL = 'call'
WARN = 'warn'
PRINT = 'print'
ALL = 'all'


# _zz_ so the sort order places this test last
Expand Down Expand Up @@ -113,13 +124,25 @@ def _test_sampmdsize(t):
return t.shape[1] != len(md) if md is not None else False


def _test_hasnan(t):
"""Check if a table contains nan values."""
# wrap in bool to ensure return dtype is Python not numpy
return bool(np.isnan(t._data.data).any())


def _test_hasinf(t):
"""Check if a table contains inf values."""
# wrap in bool to ensure return dtype is Python not numpy
return bool(np.isinf(t._data.data).any())


def _create_error_states(msg, callback, exception):
"""Create error states"""
return {'ignore': lambda x: None,
'warn': lambda x: warn(msg),
'raise': lambda x: exception(msg),
'call': callback if callback is not None else lambda x: None,
'print': lambda x: stdout.write(msg + '\n')}
return {IGNORE: lambda x: None,
WARN: lambda x: warn(msg),
RAISE: lambda x: exception(msg),
CALL: callback if callback is not None else lambda x: None,
PRINT: lambda x: stdout.write(msg + '\n')}


class ErrorProfile:
Expand All @@ -129,7 +152,7 @@ class ErrorProfile:
handled, how those errors are handled, and performs the handling of the
errors.
"""
_valid_states = frozenset(['raise', 'ignore', 'call', 'print', 'warn'])
_valid_states = frozenset([RAISE, IGNORE, CALL, PRINT, WARN])

def __init__(self):
self._profile = {}
Expand Down Expand Up @@ -213,8 +236,8 @@ def state(self):
@state.setter
def state(self, new_state):
"""Update current state"""
if 'all' in new_state:
to_update = [(err, new_state['all']) for err in self._state]
if ALL in new_state:
to_update = [(err, new_state[ALL]) for err in self._state]
else:
to_update = new_state.items()

Expand Down Expand Up @@ -252,7 +275,10 @@ def test(self, item, *args):
args = self._test.keys()

for errtype in sorted(args):
test = self._test.get(errtype, lambda: None)
test = self._test.get(errtype, lambda _: None)

if self._state.get(errtype) == IGNORE:
continue

if test(item):
return self._handle_error(errtype, item)
Expand Down Expand Up @@ -320,19 +346,23 @@ def getcall(self, errtype):


__errprof = ErrorProfile()
__errprof.register('empty', EMPTY, 'ignore', _zz_test_empty,
__errprof.register('empty', EMPTY, IGNORE, _zz_test_empty,
exception=TableException)
__errprof.register('obssize', OBSSIZE, RAISE, _test_obssize,
exception=TableException)
__errprof.register('sampsize', SAMPSIZE, RAISE, _test_sampsize,
exception=TableException)
__errprof.register('obssize', OBSSIZE, 'raise', _test_obssize,
__errprof.register('obsdup', OBSDUP, RAISE, _test_obsdup,
exception=TableException)
__errprof.register('sampsize', SAMPSIZE, 'raise', _test_sampsize,
__errprof.register('sampdup', SAMPDUP, RAISE, _test_sampdup,
exception=TableException)
__errprof.register('obsdup', OBSDUP, 'raise', _test_obsdup,
__errprof.register('obsmdsize', OBSMDSIZE, RAISE, _test_obsmdsize,
exception=TableException)
__errprof.register('sampdup', SAMPDUP, 'raise', _test_sampdup,
__errprof.register('sampmdsize', SAMPMDSIZE, RAISE, _test_sampmdsize,
exception=TableException)
__errprof.register('obsmdsize', OBSMDSIZE, 'raise', _test_obsmdsize,
__errprof.register('hasnan', HASNAN, IGNORE, _test_hasnan,
exception=TableException)
__errprof.register('sampmdsize', SAMPMDSIZE, 'raise', _test_sampmdsize,
__errprof.register('hasinf', HASINF, IGNORE, _test_hasinf,
exception=TableException)


Expand Down Expand Up @@ -384,8 +414,8 @@ def seterr(**kwargs):
"""
old_state = __errprof.state.copy()
if 'all' in kwargs:
__errprof.state = {'all': kwargs['all']}
if ALL in kwargs:
__errprof.state = {ALL: kwargs[ALL]}
else:
__errprof.state = kwargs
return old_state
Expand Down Expand Up @@ -499,5 +529,7 @@ def errstate(**kwargs):
"""
old_state = seterr(**kwargs)
yield
seterr(**old_state)
try:
yield
finally:
seterr(**old_state)
121 changes: 101 additions & 20 deletions biom/tests/test_err.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
from biom.err import (_zz_test_empty, _test_obssize, _test_sampsize,
_test_obsdup, _test_sampdup, _test_obsmdsize,
_test_sampmdsize, errstate, geterr, seterr, geterrcall,
seterrcall, errcheck, __errprof)
_test_hasnan, _test_hasinf, seterrcall, errcheck,
__errprof, IGNORE, RAISE, EMPTY, OBSSIZE, SAMPSIZE, CALL,
WARN, OBSDUP, SAMPDUP, OBSMDSIZE, SAMPMDSIZE, HASNAN,
HASINF)


runtime_ep = __errprof
Expand Down Expand Up @@ -67,6 +70,16 @@ def test_test_sampmdsize(self):
self.ex_table._sample_metadata[:-1]
self.assertTrue(_test_sampmdsize(self.ex_table))

def test_test_hasnan(self):
self.assertFalse(_test_hasnan(self.ex_table))
self.ex_table._data.data[0] = np.nan
self.assertTrue(_test_hasnan(self.ex_table))

def test_test_hasinf(self):
self.assertFalse(_test_hasinf(self.ex_table))
self.ex_table._data.data[0] = np.inf
self.assertTrue(_test_hasinf(self.ex_table))


class ErrorProfileTests(TestCase):
def setUp(self):
Expand Down Expand Up @@ -101,11 +114,11 @@ def test_test_evaluation_order(self):
'Duplicate observation IDs')

def test_state(self):
self.ep.state = {'all': 'ignore'}
self.ep.state = {'all': IGNORE}
self.assertEqual(set(self.ep._state.values()), {'ignore'})
self.ep.state = {'empty': 'call'}
self.assertEqual(set(self.ep._state.values()), {'ignore', 'call'})
self.assertEqual(self.ep.state['empty'], 'call')
self.ep.state = {'empty': CALL}
self.assertEqual(set(self.ep._state.values()), {'ignore', CALL})
self.assertEqual(self.ep.state['empty'], CALL)

with self.assertRaises(KeyError):
self.ep.state = {'empty': 'missing'}
Expand All @@ -124,17 +137,18 @@ def callback(foo):
self.ep.setcall('empty', callback)

self.assertTrue(isinstance(self.ep._handle_error('empty', None),
TableException))
self.ep.state = {'empty': 'call'}
TableException))

self.ep.state = {'empty': CALL}
self.assertEqual(self.ep._handle_error('empty', None), 10)

def test_setcall(self):
def callback(foo):
return 10

self.assertEqual(self.ep._profile['empty']['call'](None), None)
self.assertEqual(self.ep._profile['empty'][CALL](None), None)
self.ep.setcall('empty', callback)
self.assertEqual(self.ep._profile['empty']['call'](None), 10)
self.assertEqual(self.ep._profile['empty'][CALL](None), 10)

with self.assertRaises(KeyError):
self.ep.setcall('emptyfoo', callback)
Expand All @@ -155,16 +169,16 @@ def cb(x):
def test(x):
return x == 5

self.ep.register('foo', 'bar', 'ignore', test, callback=cb)
self.ep.register('foo', 'bar', IGNORE, test, callback=cb)
self.assertTrue('foo' in self.ep)
self.ep.state = {'foo': 'call'}
self.ep.state = {'foo': CALL}
self.assertEqual(self.ep._handle_error('foo', None), 123)

foo_prof = self.ep._profile['foo'].copy()
prof, func, state = self.ep.unregister('foo')

self.assertEqual(func, test)
self.assertEqual(state, 'call')
self.assertEqual(state, CALL)
self.assertEqual(prof, foo_prof)

with self.assertRaises(KeyError):
Expand All @@ -184,20 +198,20 @@ def setUp(self):
def test_geterr(self):
state = geterr()
self.assertEqual(state, runtime_ep._state)
old = seterr(all='call')
old = seterr(all=CALL)
self.assertNotEqual(geterr(), state)
seterr(**old)

def test_seterr(self):
existing = seterr(empty='warn')
self.assertEqual(runtime_ep._state['empty'], 'warn')
existing = seterr(empty=WARN)
self.assertEqual(runtime_ep._state['empty'], WARN)
self.assertNotEqual(runtime_ep._state['empty'], existing)
seterr(empty=existing['empty'])
self.assertNotEqual(runtime_ep._state['empty'], 'warn')
self.assertNotEqual(runtime_ep._state['empty'], WARN)
self.assertEqual(runtime_ep._state, existing)

def test_geterrcall(self):
exp = runtime_ep._profile['sampsize']['call']
exp = runtime_ep._profile['sampsize'][CALL]
obs = geterrcall('sampsize')
self.assertEqual(obs, exp)

Expand All @@ -224,11 +238,78 @@ def foo(item):

table = Table([], [], [])
seterrcall('empty', foo)
self.assertNotEqual(geterr()['empty'], 'call')
with errstate(empty='call'):
self.assertNotEqual(geterr()['empty'], CALL)
with errstate(empty=CALL):
result = errcheck(table)
self.assertEqual(result, "the callback called")
self.assertNotEqual(geterr()['empty'], 'call')
self.assertNotEqual(geterr()['empty'], CALL)

def _what_to_raise(errtype):
d = {k: IGNORE for k in __errprof._state}
d[errtype] = RAISE
return d


class IntegrationTests(TestCase):
def _check(self, errcond, msg, table_data):
with self.assertRaisesRegex(TableException, msg):
with errstate(**_what_to_raise(errcond)):
Table(*table_data)

def test_has_duplicate_samples(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
list('ab'),
['S1', 'S1', 'S2'])
self._check('sampdup', SAMPDUP, data)

def test_has_duplicate_observations(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'x'],
list('abc'))
self._check('obsdup', OBSDUP, data)

def test_is_empty(self):
data = ([], [], [])
self._check('empty', EMPTY, data)

def test_observation_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['w', 'x', 'y'],
list('abc'))
self._check('obssize', OBSSIZE, data)

def test_sample_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['w', 'x'],
list('ab'))
self._check('sampsize', SAMPSIZE, data)

def test_observation_metadata_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'y'],
list('abc'),
[{1: 2}, {1: 3}, {1: 4}])
self._check('obsmdsize', OBSMDSIZE, data)

def test_sample_metadata_size(self):
data = (np.array([[1, 2, 3], [4, 5, 6]]),
['x', 'y'],
list('abc'),
None,
[{1: 2}, ])
self._check('sampmdsize', SAMPMDSIZE, data)

def test_has_nan(self):
data = (np.array([[1, 2, np.nan], [4, 5, 6]]),
['x', 'y'],
list('abc'))
self._check('hasnan', HASNAN, data)

def test_has_inf(self):
data = (np.array([[1, 2, np.inf], [4, 5, 6]]),
['x', 'y'],
list('abc'))
self._check('hasinf', HASINF, data)


if __name__ == '__main__':
Expand Down
6 changes: 4 additions & 2 deletions biom/tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from biom.parse import (generatedby, MetadataMap, parse_biom_table, parse_uc,
load_table, save_table)
from biom.table import Table
from biom.err import errstate, IGNORE
from biom.util import __version__
from biom.tests.long_lines import (uc_empty, uc_invalid_id, uc_minimal,
uc_lib_minimal,
Expand Down Expand Up @@ -61,8 +62,9 @@ def tearDown(self):

def test_from_tsv_bug_854(self):
data = StringIO('#FeatureID\tSample1')
exp = Table([], [], ['Sample1'])
obs = Table.from_tsv(data, None, None, lambda x: x)
with errstate(all=IGNORE):
exp = Table([], [], ['Sample1'])
obs = Table.from_tsv(data, None, None, lambda x: x)
self.assertEqual(obs, exp)

def test_generatedby(self):
Expand Down

0 comments on commit 47b2cc5

Please sign in to comment.