Skip to content

Commit

Permalink
Moving loading and save sigs to rust
Browse files Browse the repository at this point in the history
move json parsing and init to rust
working on loading sigs

55 failing. Now it's failing because SBT index is saving all signatures
(instead of only the one it was used to build the tree).
This was actually a feature (see #198) but it broke the SBT code
(it wasn't ready for that!)
  • Loading branch information
luizirber committed Jan 9, 2019
1 parent 32bdb73 commit d63d1fc
Show file tree
Hide file tree
Showing 12 changed files with 228 additions and 558 deletions.
7 changes: 5 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,11 @@ def build_native(spec):
"packages": find_packages(),
"zip_safe": False,
"platforms": "any",
"entry_points": {"console_scripts": ["sourmash = sourmash.__main__:main"]},
"install_requires": ["screed>=0.9", "ijson", "khmer>=2.1", "milksnake"],
"entry_points": {'console_scripts': [
'sourmash = sourmash.__main__:main'
]
},
"install_requires": ["screed>=0.9", "khmer>=2.1", 'milksnake'],
"setup_requires": [
"setuptools>=38.6.0",
"milksnake",
Expand Down
9 changes: 9 additions & 0 deletions sourmash/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@ def implements_to_string(cls):
itervalues = lambda x: x.values()
NUL = 0
implements_to_string = lambda x: x


def to_bytes(s):
if not isinstance(s, string_types + (bytes,)):
raise TypeError("Requires a string-like sequence")

if isinstance(s, string_types):
s = s.encode('utf-8')
return s
17 changes: 5 additions & 12 deletions sourmash/_minhash.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import math
import copy

from ._compat import string_types, range_type
from ._lowlevel import ffi, lib
from ._compat import to_bytes
from .utils import RustObject, rustcall

# default MurmurHash seed
Expand Down Expand Up @@ -40,15 +40,6 @@ def get_scaled_for_max_hash(max_hash):
return int(round(get_minhash_max_hash() / max_hash, 0))


def to_bytes(s):
if not isinstance(s, string_types + (bytes,)):
raise TypeError("Requires a string-like sequence")

if isinstance(s, string_types):
s = s.encode("utf-8")
return s


def hash_murmur(kmer, seed=MINHASH_DEFAULT_SEED):
"hash_murmur(string, [,seed])\n\n"
"Compute a hash for a string, optionally using a seed (an integer). "
Expand Down Expand Up @@ -92,8 +83,6 @@ def __init__(
mins=None,
scaled=0,
):
self.track_abundance = track_abundance

if max_hash and scaled:
raise ValueError("cannot set both max_hash and scaled")
elif scaled:
Expand Down Expand Up @@ -221,6 +210,10 @@ def subtract_mins(self, other):
b = set(other.get_mins())
return a - b

@property
def track_abundance(self):
return self._methodcall(lib.kmerminhash_track_abundance)

@property
def seed(self):
return self._methodcall(lib.kmerminhash_seed)
Expand Down
3 changes: 2 additions & 1 deletion sourmash/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -1325,14 +1325,14 @@ def watch(args):
ksize = tree_mh.ksize

E = MinHash(ksize=ksize, n=args.num_hashes, is_protein=is_protein)
streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)

notify('Computing signature for k={}, {} from stdin', ksize, moltype)

def do_search():
search_fn = SearchMinHashesFindBest().search

results = []
streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)
for leaf in tree.find(search_fn, streamsig, args.threshold):
results.append((streamsig.similarity(leaf.data),
leaf.data))
Expand Down Expand Up @@ -1370,6 +1370,7 @@ def do_search():

if args.output:
notify('saving signature to {}', args.output.name)
streamsig = sig.SourmashSignature(E, filename='stdin', name=args.name)
sig.save_signatures([streamsig], args.output)


Expand Down
Loading

0 comments on commit d63d1fc

Please sign in to comment.