Skip to content

Commit

Permalink
fix gather output bug (#589)
Browse files Browse the repository at this point in the history
* fix gather output bug
* add test for gather database filename output bug
  • Loading branch information
ctb authored and luizirber committed Jan 3, 2019
1 parent bddd23f commit 29e5cbd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
8 changes: 4 additions & 4 deletions sourmash/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,29 +144,29 @@ def find_best(dblist, query, remainder):
leaf_e = leaf.data.minhash
similarity = query.minhash.containment_ignore_maxhash(leaf_e)
if similarity > 0.0:
results.append((similarity, leaf.data))
results.append((similarity, leaf.data, filename))
# or an LCA database
elif filetype == 'LCA':
lca_db = obj
for x in lca_db.find(query.minhash, 0.0,
containment=True, ignore_scaled=True):
(score, match_sig, md5, filename, name) = x
if score > 0.0:
results.append((score, match_sig))
results.append((score, match_sig, filename))

# search a signature
else:
for ss in obj:
similarity = query.minhash.containment_ignore_maxhash(ss.minhash)
if similarity > 0.0:
results.append((similarity, ss))
results.append((similarity, ss, filename))

if not results:
return None, None, None

# take the best result
results.sort(key=lambda x: (-x[0], x[1].name())) # reverse sort on similarity, and then on name
best_similarity, best_leaf = results[0]
best_similarity, best_leaf, filename = results[0]

for x in results[1:]:
remainder.add(x[1])
Expand Down
14 changes: 14 additions & 0 deletions tests/test_sourmash.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,20 @@ def test_gather_lca_db(c):
assert 'NC_009665.1 Shewanella baltica OS185' in str(c)


@utils.in_tempdir
def test_gather_csv_output_filename_bug(c):
# check a bug where the database filename in the output CSV was incorrect
query = utils.get_test_data('lca/TARA_ASE_MAG_00031.sig')
lca_db_1 = utils.get_test_data('lca/delmont-1.lca.json')
lca_db_2 = utils.get_test_data('lca/delmont-2.lca.json')

c.run_sourmash('gather', query, lca_db_1, lca_db_2, '-o', 'out.csv')
with open(c.output('out.csv'), 'rt') as fp:
r = csv.DictReader(fp)
row = next(r)
assert row['filename'] == lca_db_1


def test_compare_deduce_molecule():
# deduce DNA vs protein from query, if it is unique
with utils.TempDirectory() as location:
Expand Down

0 comments on commit 29e5cbd

Please sign in to comment.