diff --git a/sourmash/lca/command_gather.py b/sourmash/lca/command_gather.py index 4e9a690afc..812c6b3b41 100644 --- a/sourmash/lca/command_gather.py +++ b/sourmash/lca/command_gather.py @@ -184,6 +184,10 @@ def gather_main(args): """ set_quiet(args.quiet, args.debug) + notify("** WARNING: lca gather is deprecated as of sourmash 3.4, and will") + notify("** be removed in sourmash 4.0; use 'gather' instead.") + notify('') + if not check_files_exist(args.query, *args.db): sys.exit(-1) @@ -191,7 +195,8 @@ def gather_main(args): dblist, ksize, scaled = lca_utils.load_databases(args.db, None) # for each query, gather all the matches across databases - query_sig = sourmash_args.load_query_signature(args.query, ksize, 'DNA') + moltype = dblist[0].moltype + query_sig = sourmash_args.load_query_signature(args.query, ksize, moltype) debug('classifying', query_sig.name()) # make sure we're looking at the same scaled value as database diff --git a/sourmash/lca/lca_db.py b/sourmash/lca/lca_db.py index fec69ddcb4..ee5a1e5f07 100644 --- a/sourmash/lca/lca_db.py +++ b/sourmash/lca/lca_db.py @@ -519,6 +519,7 @@ def load_databases(filenames, scaled=None, verbose=True): "Load multiple LCA databases; return (dblist, ksize, scaled)" ksize_vals = set() scaled_vals = set() + moltype_vals = set() dblist = [] # load all the databases @@ -537,14 +538,19 @@ def load_databases(filenames, scaled=None, verbose=True): lca_db.downsample_scaled(scaled) scaled_vals.add(lca_db.scaled) + moltype_vals.add(lca_db.moltype) + if len(moltype_vals) > 1: + raise Exception('multiple moltypes, quitting') + dblist.append(lca_db) ksize = ksize_vals.pop() scaled = scaled_vals.pop() + moltype = moltype_vals.pop() if verbose: notify(u'\r\033[K', end=u'') - notify('loaded {} LCA databases. ksize={}, scaled={}', len(dblist), - ksize, scaled) + notify('loaded {} LCA databases. ksize={}, scaled={} moltype={}', + len(dblist), ksize, scaled, moltype) return dblist, ksize, scaled diff --git a/sourmash/sbt.py b/sourmash/sbt.py index 96845b9a24..2885b3044e 100644 --- a/sourmash/sbt.py +++ b/sourmash/sbt.py @@ -57,8 +57,6 @@ def search_transcript(node, seq, threshold): import sys from tempfile import NamedTemporaryFile -from deprecation import deprecated - from .exceptions import IndexNotSupported from .sbt_storage import FSStorage, TarStorage, IPFSStorage, RedisStorage, ZipStorage from .logging import error, notify, debug diff --git a/tests/test_lca.py b/tests/test_lca.py index 52a407828f..b38cdb22ea 100644 --- a/tests/test_lca.py +++ b/tests/test_lca.py @@ -1671,6 +1671,44 @@ def test_incompat_lca_db_scaled(c): assert 'new scaled 10000 is lower than current sample scaled 10000' in str(e.value) +@utils.in_thisdir +def test_lca_gather_protein(c): + # test lca gather on protein foo + testquery = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') + db1 = utils.get_test_data('prot/protein.lca.json.gz') + + c.run_sourmash('lca', 'gather', testquery, db1) + + assert c.last_result.status == 0 + assert 'loaded 1 LCA databases. ksize=57, scaled=100 moltype=protein' in c.last_result.err + assert '340.9 kbp 100.0% 100.0% s__B26-1 sp001593925 sp.' in c.last_result.out + + +@utils.in_thisdir +def test_lca_gather_deprecated_message(c): + # lca gather is deprecated for 4.0; check message + testquery = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') + db1 = utils.get_test_data('prot/protein.lca.json.gz') + + c.run_sourmash('lca', 'gather', testquery, db1) + + assert c.last_result.status == 0 + assert 'WARNING: lca gather is deprecated as of sourmash 3.4' in c.last_result.err + + +@utils.in_thisdir +def test_incompat_lca_db_moltype(c): + # test load of incompatible LCA DBs + testquery = utils.get_test_data('prot/protein/GCA_001593925.1_ASM159392v1_protein.faa.gz.sig') + db1 = utils.get_test_data('prot/protein.lca.json.gz') + db2 = utils.get_test_data('prot/dayhoff.lca.json.gz') + + with pytest.raises(ValueError) as e: + c.run_sourmash('lca', 'gather', testquery, db1, db2) + + assert 'Exception: multiple moltypes, quitting' in str(e.value) + + @utils.in_tempdir def test_incompat_lca_db_ksize(c): # create a database with ksize of 25