diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index 0a9cd4bc9e..3ec03ed9e0 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -384,10 +384,10 @@ def overlap(args): moltype = sourmash_args.calculate_moltype(args) - sig1 = sourmash.load_one_signature( + sig1 = sourmash_args.load_one_signature( args.signature1, ksize=args.ksize, select_moltype=moltype ) - sig2 = sourmash.load_one_signature( + sig2 = sourmash_args.load_one_signature( args.signature2, ksize=args.ksize, select_moltype=moltype ) @@ -573,7 +573,7 @@ def intersect(args): # borrow abundances from a signature? if args.abundances_from: notify(f"loading signature from {args.abundances_from}, keeping abundances") - abund_sig = sourmash.load_one_signature( + abund_sig = sourmash_args.load_one_signature( args.abundances_from, ksize=args.ksize, select_moltype=moltype ) if not abund_sig.minhash.track_abundance: @@ -646,9 +646,8 @@ def subtract(args): set_quiet(args.quiet) moltype = sourmash_args.calculate_moltype(args) - from_sigfile = args.signature_from - from_sigobj = sourmash.load_one_signature( - from_sigfile, ksize=args.ksize, select_moltype=moltype + from_sigobj = sourmash_args.load_one_signature( + args.signature_from, ksize=args.ksize, select_moltype=moltype ) if args.abundances_from: # it's ok to work with abund signatures if -A. @@ -661,7 +660,7 @@ def subtract(args): subtract_mins = set(from_mh.hashes) - notify(f"loaded signature from {from_sigfile}...", end="\r") + notify(f"loaded signature from {args.signature_from}...", end="\r") progress = sourmash_args.SignatureLoadingProgress() @@ -694,9 +693,10 @@ def subtract(args): # borrow abundances from somewhere? if args.abundances_from: notify(f"loading signature from {args.abundances_from}, keeping abundances") - abund_sig = sourmash.load_one_signature( + abund_sig = sourmash_args.load_one_signature( args.abundances_from, ksize=args.ksize, select_moltype=moltype ) + if not abund_sig.minhash.track_abundance: error("--track-abundance not set on loaded signature?! exiting.") sys.exit(-1) diff --git a/src/sourmash/sourmash_args.py b/src/sourmash/sourmash_args.py index fdbc0e4cf6..c4483cfe6c 100644 --- a/src/sourmash/sourmash_args.py +++ b/src/sourmash/sourmash_args.py @@ -810,3 +810,40 @@ def load_file_as_signatures( return progress.start_file(filename, loader) else: return loader + + +def load_one_signature( + filename, + *, + select_moltype=None, + ksize=None, + picklist=None, + yield_all_files=False, + pattern=None, +): + db = _load_database(filename, yield_all_files) + + db = db.select(moltype=select_moltype, ksize=ksize) + + # apply pattern search & picklist + db = apply_picklist_and_pattern(db, picklist, pattern) + + loader = db.signatures() + + # load exactly one! + try: + ss = next(iter(loader)) + except StopIteration: + raise ValueError(f"no signatures in '{filename}'? expected exactly one.") + + # make sure there's not a second one... + try: + _ = next(iter(loader)) + + raise ValueError( + f"more than one signature in '{filename}'; expected exactly one" + ) + except StopIteration: + pass + + return ss diff --git a/tests/test-data/47.fa.sig.zip b/tests/test-data/47.fa.sig.zip new file mode 100644 index 0000000000..d194944d88 Binary files /dev/null and b/tests/test-data/47.fa.sig.zip differ diff --git a/tests/test-data/63.fa.sig.zip b/tests/test-data/63.fa.sig.zip new file mode 100644 index 0000000000..3a84891a24 Binary files /dev/null and b/tests/test-data/63.fa.sig.zip differ diff --git a/tests/test_cmd_signature.py b/tests/test_cmd_signature.py index 8dfe8dc74a..04133a6796 100644 --- a/tests/test_cmd_signature.py +++ b/tests/test_cmd_signature.py @@ -769,8 +769,8 @@ def test_sig_inflate_5_bad_moltype(runtmp): assert "no signatures to inflate" in runtmp.last_result.err -@utils.in_tempdir -def test_sig_subtract_1(c): +def test_sig_subtract_1(runtmp): + c = runtmp # subtract of 63 from 47 sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -789,6 +789,28 @@ def test_sig_subtract_1(c): assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) +def test_sig_subtract_1_sigzip(runtmp): + c = runtmp + # subtract of 63 from 47 + sig47 = utils.get_test_data("47.fa.sig.zip") + sig63 = utils.get_test_data("63.fa.sig.zip") + c.run_sourmash("sig", "subtract", sig47, sig63) + + # stdout should be new signature + out = c.last_result.out + + from sourmash import sourmash_args + + test1_sig = sourmash_args.load_one_signature(sig47) + test2_sig = sourmash_args.load_one_signature(sig63) + actual_subtract_sig = sourmash.load_one_signature(out) + + mins = set(test1_sig.minhash.hashes.keys()) + mins -= set(test2_sig.minhash.hashes.keys()) + + assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) + + def test_sig_subtract_1_abund(runtmp): # subtract 63 from 47, with abundances borrowed from 47 @@ -856,8 +878,8 @@ def test_sig_subtract_1_flatten(runtmp): assert set(actual_subtract_sig.minhash.hashes.keys()) == set(mins) -@utils.in_tempdir -def test_sig_subtract_1_multisig(c): +def test_sig_subtract_1_multisig(runtmp): + c = runtmp # subtract of everything from 47 sig47 = utils.get_test_data("47.fa.sig") multisig = utils.get_test_data("47+63-multisig.sig") @@ -871,8 +893,8 @@ def test_sig_subtract_1_multisig(c): assert not set(actual_subtract_sig.minhash.hashes.keys()) -@utils.in_tempdir -def test_sig_subtract_2(c): +def test_sig_subtract_2(runtmp): + c = runtmp # subtract of 63 from 47 should fail if 47 has abund sig47 = utils.get_test_data("track_abund/47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -881,8 +903,8 @@ def test_sig_subtract_2(c): c.run_sourmash("sig", "subtract", sig47, sig63) -@utils.in_tempdir -def test_sig_subtract_3(c): +def test_sig_subtract_3(runtmp): + c = runtmp # subtract of 63 from 47 should fail if 63 has abund sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("track_abund/63.fa.sig") @@ -891,8 +913,8 @@ def test_sig_subtract_3(c): c.run_sourmash("sig", "subtract", sig47, sig63) -@utils.in_tempdir -def test_sig_subtract_4_ksize_fail(c): +def test_sig_subtract_4_ksize_fail(runtmp): + c = runtmp # subtract of 2 from 47 should fail without -k specified sig47 = utils.get_test_data("47.fa.sig") sig2 = utils.get_test_data("2.fa.sig") @@ -901,8 +923,8 @@ def test_sig_subtract_4_ksize_fail(c): c.run_sourmash("sig", "subtract", sig47, sig2) -@utils.in_tempdir -def test_sig_subtract_4_ksize_succeed(c): +def test_sig_subtract_4_ksize_succeed(runtmp): + c = runtmp # subtract of 2 from 47 should fail without -k specified sig47 = utils.get_test_data("47.fa.sig") sig2 = utils.get_test_data("2.fa.sig") @@ -3839,8 +3861,8 @@ def test_sig_describe_3_manifest_fails_when_moved(runtmp): runtmp.sourmash("sig", "describe", "mf.csv") -@utils.in_tempdir -def test_sig_overlap(c): +def test_sig_overlap(runtmp): + c = runtmp # get overlap details sig47 = utils.get_test_data("47.fa.sig") sig63 = utils.get_test_data("63.fa.sig") @@ -3857,6 +3879,24 @@ def test_sig_overlap(c): assert "number of hashes in common: 2529" in out +def test_sig_overlap_2(runtmp): + c = runtmp + # get overlap details + sig47 = utils.get_test_data("47.fa.sig.zip") + sig63 = utils.get_test_data("63.fa.sig.zip") + c.run_sourmash("sig", "overlap", sig47, sig63) + out = c.last_result.out + + print(out) + + # md5s + assert "09a08691ce52952152f0e866a59f6261" in out + assert "38729c6374925585db28916b82a6f513" in out + + assert "similarity: 0.32069" in out + assert "number of hashes in common: 2529" in out + + @utils.in_tempdir def test_import_export_1(c): # check to make sure we can import what we've exported! diff --git a/tests/test_sourmash_args.py b/tests/test_sourmash_args.py index 7fcbe2511e..8f58a0a25f 100644 --- a/tests/test_sourmash_args.py +++ b/tests/test_sourmash_args.py @@ -841,3 +841,31 @@ def test_bug_2370(runtmp): # try running sourmash_args.load_file_as_index # runtmp.sourmash('sig', 'describe', runtmp.output('not_really_gzipped.gz')) sourmash_args.load_file_as_index(runtmp.output("not_really_gzipped.gz")) + + +def test_load_one_signature_1(runtmp): + # test the sourmash_args.load_one_signature function + sigfile = utils.get_test_data("63.fa.sig.zip") + + ss = sourmash_args.load_one_signature(sigfile, ksize=31) + assert ss.name.startswith("NC_011663.1 ") + + +def test_load_one_signature_2_fail(runtmp): + # test the sourmash_args.load_one_signature function on failure - no sig + sigfile = utils.get_test_data("63.fa.sig.zip") + + with pytest.raises(ValueError) as exc: + sourmash_args.load_one_signature(sigfile, ksize=21) + + assert "expected exactly one." in str(exc) + + +def test_load_one_signature_3_fail(runtmp): + # test the sourmash_args.load_one_signature function on failure - many sigs + sigfile = utils.get_test_data("prot/all.zip") + + with pytest.raises(ValueError) as exc: + sourmash_args.load_one_signature(sigfile) + + assert "more than one signature" in str(exc)