diff --git a/doc/api-example.md b/doc/api-example.md index 658497aef5..2a421a2d0d 100644 --- a/doc/api-example.md +++ b/doc/api-example.md @@ -290,7 +290,7 @@ We can downsample this to 500 by extracting the hashes and using Also note that there's a convenience function that does the same thing, faster! ``` ->>> smaller2 = larger.downsample_n(500) +>>> smaller2 = larger.downsample(num=500) >>> smaller2 == smaller True @@ -312,7 +312,7 @@ The same can be done with scaled MinHashes: And, again, there's a convenience function that you can use: ``` ->>> small_scaled2 = large_scaled.downsample_scaled(500) +>>> small_scaled2 = large_scaled.downsample(scaled=500) >>> small_scaled == small_scaled2 True diff --git a/sourmash/_minhash.py b/sourmash/_minhash.py deleted file mode 100644 index 8e41fe9149..0000000000 --- a/sourmash/_minhash.py +++ /dev/null @@ -1,6 +0,0 @@ -"Legacy / deprecated; will be removed in sourmash 4.0." -import warnings - -warnings.warn("Please import from the top level sourmash module instead of using _minhash, which will be renamed in 4.x", FutureWarning) - -from .minhash import * diff --git a/sourmash/commands.py b/sourmash/commands.py index 2701079d9a..30076916a4 100644 --- a/sourmash/commands.py +++ b/sourmash/commands.py @@ -111,7 +111,7 @@ def compare(args): if not printed_scaled_msg: notify('downsampling to scaled value of {}'.format(max_scaled)) printed_scaled_msg = True - s.minhash = s.minhash.downsample_scaled(max_scaled) + s.minhash = s.minhash.downsample(scaled=max_scaled) if len(siglist) == 0: error('no signatures!') @@ -389,7 +389,7 @@ def index(args): nums.add(ss.minhash.num) if args.scaled: - ss.minhash = ss.minhash.downsample_scaled(args.scaled) + ss.minhash = ss.minhash.downsample(scaled=args.scaled) scaleds.add(ss.minhash.scaled) tree.insert(ss) @@ -450,7 +450,7 @@ def search(args): if args.scaled != query.minhash.scaled: notify('downsampling query from scaled={} to {}', query.minhash.scaled, int(args.scaled)) - query.minhash = query.minhash.downsample_scaled(args.scaled) + query.minhash = query.minhash.downsample(scaled=args.scaled) # set up the search databases databases = sourmash_args.load_dbs_and_sigs(args.databases, query, @@ -610,7 +610,7 @@ def gather(args): if args.scaled: notify('downsampling query from scaled={} to {}', query.minhash.scaled, int(args.scaled)) - query.minhash = query.minhash.downsample_scaled(args.scaled) + query.minhash = query.minhash.downsample(scaled=args.scaled) # empty? if not len(query.minhash): @@ -762,7 +762,7 @@ def multigather(args): if args.scaled: notify('downsampling query from scaled={} to {}', query.minhash.scaled, int(args.scaled)) - query.minhash = query.minhash.downsample_scaled(args.scaled) + query.minhash = query.minhash.downsample(scaled=args.scaled) # empty? if not len(query.minhash): diff --git a/sourmash/lca/command_classify.py b/sourmash/lca/command_classify.py index 88a0425f8a..dfc3263745 100644 --- a/sourmash/lca/command_classify.py +++ b/sourmash/lca/command_classify.py @@ -135,7 +135,7 @@ def classify(args): total_count += 1 # make sure we're looking at the same scaled value as database - query_sig.minhash = query_sig.minhash.downsample_scaled(scaled) + query_sig.minhash = query_sig.minhash.downsample(scaled=scaled) # do the classification lineage, status = classify_signature(query_sig, dblist, diff --git a/sourmash/lca/command_gather.py b/sourmash/lca/command_gather.py index 9564042e41..63fa08a32b 100644 --- a/sourmash/lca/command_gather.py +++ b/sourmash/lca/command_gather.py @@ -199,7 +199,7 @@ def gather_main(args): debug('classifying', query_sig.name()) # make sure we're looking at the same scaled value as database - query_sig.minhash = query_sig.minhash.downsample_scaled(scaled) + query_sig.minhash = query_sig.minhash.downsample(scaled=scaled) # do the classification, output results found = [] diff --git a/sourmash/lca/command_summarize.py b/sourmash/lca/command_summarize.py index dc7200b40a..9cce43fed0 100644 --- a/sourmash/lca/command_summarize.py +++ b/sourmash/lca/command_summarize.py @@ -126,7 +126,7 @@ def load_singletons_and_count(filenames, ksize, scaled, with_abundance, traverse def count_signature(sig, scaled, hashvals): "Downsample sig to given scaled, count hashvalues." - mh = sig.minhash.downsample_scaled(scaled) + mh = sig.minhash.downsample(scaled=scaled) if mh.track_abundance: abunds = mh.hashes diff --git a/sourmash/lca/lca_db.py b/sourmash/lca/lca_db.py index d4a42739fc..9c607fcad3 100644 --- a/sourmash/lca/lca_db.py +++ b/sourmash/lca/lca_db.py @@ -125,7 +125,7 @@ def insert(self, sig, ident=None, lineage=None): # downsample to specified scaled; this has the side effect of # making sure they're all at the same scaled value! try: - minhash = minhash.downsample_scaled(self.scaled) + minhash = minhash.downsample(scaled=self.scaled) except ValueError: raise ValueError("cannot downsample signature; is it a scaled signature?") @@ -456,7 +456,7 @@ def _find_signatures(self, minhash, threshold, containment=False, """ # make sure we're looking at the same scaled value as database if self.scaled > minhash.scaled: - minhash = minhash.downsample_scaled(self.scaled) + minhash = minhash.downsample(scaled=self.scaled) elif self.scaled < minhash.scaled and not ignore_scaled: # note that containment can be calculated w/o matching scaled. raise ValueError("lca db scaled is {} vs query {}; must downsample".format(self.scaled, minhash.scaled)) diff --git a/sourmash/minhash.py b/sourmash/minhash.py index 915a32c996..516293213e 100644 --- a/sourmash/minhash.py +++ b/sourmash/minhash.py @@ -258,13 +258,6 @@ def add_sequence(self, sequence, force=False): self._methodcall(lib.kmerminhash_add_sequence, to_bytes(sequence), force) - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use add_kmer instead.') - def add(self, kmer): - "Add a kmer into the sketch." - self.add_sequence(kmer) - def add_kmer(self, kmer): "Add a kmer into the sketch." if len(kmer) != self.ksize: @@ -286,13 +279,6 @@ def remove_many(self, hashes): "Remove many hashes at once; ``hashes`` must be an iterable." self._methodcall(lib.kmerminhash_remove_many, list(hashes), len(hashes)) - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use add_many instead.') - def update(self, other): - "Update this sketch from all the hashes in the other." - self.add_many(other) - def __len__(self): "Number of hashes." return self._methodcall(lib.kmerminhash_get_mins_size) @@ -338,16 +324,6 @@ def hashes(self): d = self.get_mins() return _HashesWrapper({ k : 1 for k in d }) - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION) - def subtract_mins(self, other): - """Get the list of mins in this MinHash, after removing the ones in - ``other``. - """ - a = set(self.get_mins()) - b = set(other.get_mins()) - return a - b - @property def seed(self): return self._methodcall(lib.kmerminhash_seed) @@ -424,17 +400,6 @@ def clear(self): "Clears all hashes and abundances." return self._methodcall(lib.kmerminhash_clear) - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use translate_codon function at module level instead.') - def translate_codon(self, codon): - "Translate a codon into an amino acid." - try: - return rustcall(lib.sourmash_translate_codon, - to_bytes(codon)).decode('utf-8') - except SourmashError as e: - raise ValueError(e.message) - def count_common(self, other, downsample=False): """\ Return the number of hashes in common between ``self`` and ``other``. @@ -487,69 +452,6 @@ def downsample(self, num=None, scaled=None): return a - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use downsample(num=...) instead.') - def downsample_n(self, new_num): - "Copy this object and downsample new object to num=``new_num``." - return self.downsample(num=new_num) - - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use scaled instead.') - def downsample_max_hash(self, *others): - """Copy this object and downsample new object to min of ``*others``. - - Here, ``*others`` is one or more MinHash objects. - """ - max_hashes = [x.max_hash for x in others] - new_max_hash = min(self.max_hash, *max_hashes) - new_scaled = _get_scaled_for_max_hash(new_max_hash) - - return self.downsample_scaled(new_scaled) - - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use downsample(scaled=...) instead.') - def downsample_scaled(self, new_scaled): - """Copy this object and downsample new object to scaled=``new_scaled``. - """ - return self.downsample(scaled=new_scaled) - - @deprecated(deprecated_in="3.3", removed_in="4.0", - current_version=VERSION, - details='Use count_common or set methods instead.') - def intersection(self, other, in_common=False): - """Calculate the intersection between ``self`` and ``other``, and - return ``(mins, size)`` where ``mins`` are the hashes in common, and - ``size`` is the number of hashes. - - if ``in_common``, return the actual hashes. Otherwise, mins will be - empty. - """ - if not isinstance(other, MinHash): - raise TypeError("Must be a MinHash!") - - if self.num != other.num: - err = "must have same num: {} != {}".format(self.num, other.num) - raise TypeError(err) - - if in_common: - # TODO: copy from buffer to Python land instead, - # this way involves more moving data around. - combined_mh = self.copy_and_clear() - combined_mh.merge(self) - combined_mh.merge(other) - - size = len(combined_mh) - common = set(self.get_mins()) - common.intersection_update(other.get_mins()) - else: - size = self._methodcall(lib.kmerminhash_intersection, other._get_objptr()) - common = set() - - return common, max(size, 1) - def flatten(self): """Return a new MinHash with track_abundance=False.""" # create new object: @@ -568,14 +470,6 @@ def jaccard(self, other, downsample=False): raise TypeError(err) return self._methodcall(lib.kmerminhash_similarity, other._get_objptr(), True, downsample) - @deprecated(deprecated_in="3.3", removed_in="4.0", - current_version=VERSION, - details="Use 'similarity' instead of compare.") - def compare(self, other, downsample=False): - "Calculate Jaccard similarity of two sketches." - return self.jaccard(other, downsample=downsample) - - def similarity(self, other, ignore_abundance=False, downsample=False): """Calculate similarity of two sketches. @@ -611,14 +505,6 @@ def contained_by(self, other, downsample=False): return self.count_common(other, downsample) / len(self) - @deprecated(deprecated_in="3.3", removed_in="4.0", - current_version=VERSION, - details="Use 'contained_by' with downsample=True instead.") - def containment_ignore_maxhash(self, other): - """Calculate contained_by, with downsampling. - """ - return self.contained_by(other, downsample=True) - def __iadd__(self, other): if not isinstance(other, MinHash): raise TypeError("Must be a MinHash!") @@ -650,19 +536,6 @@ def add_protein(self, sequence): "Add a protein sequence." self._methodcall(lib.kmerminhash_add_protein, to_bytes(sequence)) - @deprecated(deprecated_in="3.5", removed_in="4.0", - current_version=VERSION, - details='Use the moltype property instead.') - def is_molecule_type(self, molecule): - """Check if this MinHash is a particular human-readable molecule type. - - Supports 'protein', 'dayhoff', 'hp', 'DNA'. - @CTB deprecate for 4.0? - """ - if molecule.lower() not in ('protein', 'dayhoff', 'hp', 'dna'): - raise ValueError("unknown moltype in query, '{}'".format(molecule)) - return molecule == self.moltype - @property def moltype(self): # TODO: test in minhash tests if self.is_protein: diff --git a/sourmash/sbt.py b/sourmash/sbt.py index 3b7a08de6b..9914c3d00b 100644 --- a/sourmash/sbt.py +++ b/sourmash/sbt.py @@ -308,7 +308,7 @@ def search(self, query, *args, **kwargs): if tree_mh.scaled and query.minhash.scaled and \ tree_mh.scaled > query.minhash.scaled: resampled_query_mh = tree_query.minhash - resampled_query_mh = resampled_query_mh.downsample_scaled(tree_mh.scaled) + resampled_query_mh = resampled_query_mh.downsample(scaled=tree_mh.scaled) tree_query = SourmashSignature(resampled_query_mh) # define both search function and post-search calculation function diff --git a/sourmash/search.py b/sourmash/search.py index dfff80d691..363e6af938 100644 --- a/sourmash/search.py +++ b/sourmash/search.py @@ -64,7 +64,7 @@ def search_databases(query, databases, threshold, do_containment, best_only, # build a new query object, subtracting found mins and downsampling def _subtract_and_downsample(to_remove, old_query, scaled=None): mh = old_query.minhash - mh = mh.downsample_scaled(scaled) + mh = mh.downsample(scaled=scaled) mh.remove_many(to_remove) return SourmashSignature(mh) @@ -171,7 +171,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance): float(len(orig_query_mins)) # calculate fractions wrt second denominator - metagenome size - orig_query_mh = orig_query_mh.downsample_scaled(cmp_scaled) + orig_query_mh = orig_query_mh.downsample(scaled=cmp_scaled) query_n_mins = len(orig_query_mh) f_unique_to_query = len(intersect_mins) / float(query_n_mins) diff --git a/sourmash_lib/__init__.py b/sourmash_lib/__init__.py deleted file mode 100644 index 94f09dafa1..0000000000 --- a/sourmash_lib/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -#! /usr/bin/env python -""" -An implementation of a MinHash bottom sketch, applied to k-mers in DNA. - -Legacy / deprecated; will be removed in sourmash 4.0. -""" -import sys -import warnings - -warnings.warn("Please import sourmash, instead of sourmash_lib; sourmash_lib will be removed in 4.x", FutureWarning) - -import sourmash - -sys.modules[__name__] = sys.modules['sourmash'] -#sys.modules[__name__] = __import__('sourmash') diff --git a/tests/test__minhash.py b/tests/test__minhash.py index edf7cc6fc9..46c6c4cdfb 100644 --- a/tests/test__minhash.py +++ b/tests/test__minhash.py @@ -172,20 +172,6 @@ def test_protein_hp(track_abundance, hp): assert len(mh.hashes) == 4 -def test_translate_codon_method_deprecated(track_abundance): - # Ensure that translation occurs properly - deprecated => module function - mh = MinHash(10, 6, is_protein=True) - assert mh.moltype == 'protein' - - assert "S" == mh.translate_codon('TCT') - assert "S" == mh.translate_codon('TC') - assert "X" == mh.translate_codon("T") - - with pytest.raises(ValueError): - mh.translate_codon("") - mh.translate_codon("TCTA") - - def test_module_translate_codon(track_abundance): # Ensure that translation occurs properly - module level function tests assert "S" == translate_codon('TCT') @@ -542,98 +528,6 @@ def test_similarity_1(track_abundance): assert round(b.similarity(b), 3) == 1.0 -def test_intersection_errors(track_abundance): - # CTB: remove this test in 4.0 - a = MinHash(20, 10, track_abundance=track_abundance) - b = MinHash(20, 10, track_abundance=track_abundance) - c = MinHash(30, 10, track_abundance=track_abundance) - - a.add_sequence("TGCCGCCCAGCA") - b.add_sequence("TGCCGCCCAGCA") - - common = set(a.hashes) - combined_size = 3 - - intersection, size = a.intersection(b, in_common=False) - assert intersection == set() - assert combined_size == size - - with pytest.raises(TypeError): - a.intersection(set()) - - with pytest.raises(TypeError): - a.intersection(c) - - -# this filter doesn't work, but leaving it in pour encourages les autres. -@pytest.mark.filterwarnings("ignore") -def test_intersection_1(track_abundance): - # CTB: remove this test in 4.0 - a = MinHash(20, 10, track_abundance=track_abundance) - b = MinHash(20, 10, track_abundance=track_abundance) - - a.add_sequence('TGCCGCCCAGCA') - b.add_sequence('TGCCGCCCAGCA') - - common = set(a.hashes) - combined_size = 3 - - intersection, size = a.intersection(b, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = b.intersection(b, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = b.intersection(a, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = a.intersection(a, in_common=True) - assert intersection == common - assert combined_size == size - - # add same sequence again - b.add_sequence('TGCCGCCCAGCA') - - intersection, size = a.intersection(b, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = b.intersection(b, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = b.intersection(a, in_common=True) - assert intersection == common - assert combined_size == size - - intersection, size = a.intersection(a, in_common=True) - assert intersection == common - assert combined_size == size - - a.add_sequence('GTCCGCCCAGTGA') - b.add_sequence('GTCCGCCCAGTGG') - - new_in_common = set(a.hashes).intersection(set(b.hashes)) - new_combined_size = 8 - - intersection, size = a.intersection(b, in_common=True) - assert intersection == new_in_common - assert size == new_combined_size - - intersection, size = b.intersection(a, in_common=True) - assert intersection == new_in_common - assert size == new_combined_size - - intersection, size = a.intersection(a, in_common=True) - assert intersection == set(a.hashes) - - intersection, size = b.intersection(b, in_common=True) - assert intersection == set(b.hashes) - - def test_mh_copy(track_abundance): a = MinHash(20, 10, track_abundance=track_abundance) @@ -1317,19 +1211,6 @@ def test_scaled_property(track_abundance): assert a.scaled == scaled -def test_mh_subtract(track_abundance): - # test subtracting two identically configured minhashes - a = MinHash(20, 10, track_abundance=track_abundance) - for i in range(0, 40, 2): - a.add_hash(i) - - b = MinHash(20, 10, track_abundance=track_abundance) - for i in range(0, 80, 4): - b.add_hash(i) - - assert a.subtract_mins(b) == set(range(2, 40, 4)) - - def test_pickle_max_hash(track_abundance): a = MinHash(0, 10, track_abundance=track_abundance, scaled=_get_scaled_for_max_hash(20)) @@ -1569,24 +1450,6 @@ def test_add_kmer_too_long(track_abundance): mh1.add_kmer('ATGCGTGC') -def test_add_deprecated(track_abundance): - # test 'add' method, now deprecated - mh1 = MinHash(0, 4, scaled=1, track_abundance=track_abundance) - mh2 = MinHash(0, 4, scaled=1, track_abundance=track_abundance) - - mh1.add_sequence('ATGCGTGC') - a = mh1.hashes - - mh2.add('ATGC') - mh2.add('TGCG') - mh2.add('GCGT') - mh2.add('CGTG') - mh2.add('GTGC') - b = mh2.hashes - - assert set(a.items()) == set(b.items()) - - def test_get_mins_deprecated(track_abundance): mh = MinHash(0, 21, scaled=1, track_abundance=track_abundance) mins = (28945103950853965, 74690756200987412, 82962372765557409) @@ -1634,24 +1497,6 @@ def test_downsample_num(track_abundance): assert list(sorted(mh2.hashes)) == list(range(5)) -def test_downsample_n_deprecated(track_abundance): - # test downsample_n(...) function, now deprecated - mh = MinHash(10, 21, track_abundance=track_abundance) - for i in range(20): - mh.add_hash(i) - - assert mh.num == 10 - assert len(mh) == 10 - - assert list(sorted(mh.hashes)) == list(range(10)) - - mh2 = mh.downsample_n(5) - assert mh2.num == 5 - assert len(mh2) == 5 - - assert list(sorted(mh2.hashes)) == list(range(5)) - - def test_downsample_scaled(track_abundance): # test downsample(scaled...) method mh = MinHash(0, 21, scaled=1, track_abundance=track_abundance) @@ -1671,28 +1516,8 @@ def test_downsample_scaled(track_abundance): assert list(sorted(mh2.hashes)) == list(mins[:3]) -def test_downsample_scaled_deprecated(track_abundance): - # test downsample_scaled(...) method, now deprecated - mh = MinHash(0, 21, scaled=1, track_abundance=track_abundance) - - mins = (1, 2, 3, - 9223372036854775808 + 1, 9223372036854775808 + 2, - 9223372036854775808 + 3) - mh.add_many(mins) - - assert len(mh) == 6 - assert list(sorted(mh.hashes)) == list(mins) - - mh2 = mh.downsample_scaled(2) - print(mh.max_hash, mh2.max_hash) - - assert len(mh2) == 3 - assert list(sorted(mh2.hashes)) == list(mins[:3]) - - def test_is_molecule_type_1(track_abundance): mh = MinHash(1, 21, track_abundance=track_abundance) - assert mh.is_molecule_type('DNA') assert mh.moltype == 'DNA' assert mh.is_dna assert not mh.is_protein @@ -1702,7 +1527,6 @@ def test_is_molecule_type_1(track_abundance): def test_is_molecule_type_2(track_abundance): mh = MinHash(1, 21, track_abundance=track_abundance, is_protein=True) - assert mh.is_molecule_type('protein') assert mh.moltype == 'protein' assert not mh.is_dna assert mh.is_protein @@ -1712,7 +1536,6 @@ def test_is_molecule_type_2(track_abundance): def test_is_molecule_type_3(track_abundance): mh = MinHash(1, 21, track_abundance=track_abundance, hp=True) - assert mh.is_molecule_type('hp') assert mh.moltype == 'hp' assert not mh.is_dna assert not mh.is_protein @@ -1723,17 +1546,8 @@ def test_is_molecule_type_3(track_abundance): def test_is_molecule_type_4(track_abundance): mh = MinHash(1, 21, track_abundance=track_abundance, dayhoff=True) - assert mh.is_molecule_type('dayhoff') assert mh.moltype == 'dayhoff' assert not mh.is_dna assert not mh.is_protein assert not mh.hp assert mh.dayhoff - - -def test__minhash_import(): - from sourmash._minhash import ( - MinHash, - hash_murmur, - translate_codon - ) diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py index a92dd0db8a..3e39761a0f 100644 --- a/tests/test_deprecated.py +++ b/tests/test_deprecated.py @@ -11,7 +11,3 @@ def test_load_textmode(track_abundance): siglist = list(signature.load_signatures(sigfp)) loaded_sig = siglist[0] assert loaded_sig.name() == 's10+s11' - - -def test_import_sourmash_lib(): - import sourmash_lib diff --git a/tests/test_jaccard.py b/tests/test_jaccard.py index 61e27031cf..264a895cfb 100644 --- a/tests/test_jaccard.py +++ b/tests/test_jaccard.py @@ -187,18 +187,18 @@ def test_jaccard_on_real_data(): assert mh1.similarity(mh2) == 0.0183 assert mh2.similarity(mh1) == 0.0183 - mh1 = mh1.downsample_n(1000) - mh2 = mh2.downsample_n(1000) + mh1 = mh1.downsample(num=1000) + mh2 = mh2.downsample(num=1000) assert mh1.similarity(mh2) == 0.011 assert mh2.similarity(mh1) == 0.011 - mh1 = mh1.downsample_n(100) - mh2 = mh2.downsample_n(100) + mh1 = mh1.downsample(num=100) + mh2 = mh2.downsample(num=100) assert mh1.similarity(mh2) == 0.01 assert mh2.similarity(mh1) == 0.01 - mh1 = mh1.downsample_n(10) - mh2 = mh2.downsample_n(10) + mh1 = mh1.downsample(num=10) + mh2 = mh2.downsample(num=10) assert mh1.similarity(mh2) == 0.0 assert mh2.similarity(mh1) == 0.0 @@ -219,24 +219,24 @@ def test_scaled_on_real_data(): assert round(mh1.similarity(mh2), 5) == 0.01644 assert round(mh2.similarity(mh1), 5) == 0.01644 - mh1 = mh1.downsample_n(10000) - mh2 = mh2.downsample_n(10000) + mh1 = mh1.downsample(num=10000) + mh2 = mh2.downsample(num=10000) assert mh1.similarity(mh2) == 0.0183 assert mh2.similarity(mh1) == 0.0183 - mh1 = mh1.downsample_n(1000) - mh2 = mh2.downsample_n(1000) + mh1 = mh1.downsample(num=1000) + mh2 = mh2.downsample(num=1000) assert mh1.similarity(mh2) == 0.011 assert mh2.similarity(mh1) == 0.011 - mh1 = mh1.downsample_n(100) - mh2 = mh2.downsample_n(100) + mh1 = mh1.downsample(num=100) + mh2 = mh2.downsample(num=100) assert mh1.similarity(mh2) == 0.01 assert mh2.similarity(mh1) == 0.01 - mh1 = mh1.downsample_n(10) - mh2 = mh2.downsample_n(10) + mh1 = mh1.downsample(num=10) + mh2 = mh2.downsample(num=10) assert mh1.similarity(mh2) == 0.0 assert mh2.similarity(mh1) == 0.0