sourmash-bio · luizirber · Aug 6, 2020 · Aug 5, 2020 · Aug 6, 2020 · Aug 6, 2020
diff --git a/doc/api-example.md b/doc/api-example.md
@@ -290,7 +290,7 @@ We can downsample this to 500 by extracting the hashes and using
 Also note that there's a convenience function that does the same thing,
 faster!
 ```
->>> smaller2 = larger.downsample_n(500)
+>>> smaller2 = larger.downsample(num=500)
 >>> smaller2 == smaller
 True
 
@@ -312,7 +312,7 @@ The same can be done with scaled MinHashes:
 
 And, again, there's a convenience function that you can use:
 ```
->>> small_scaled2 = large_scaled.downsample_scaled(500)
+>>> small_scaled2 = large_scaled.downsample(scaled=500)
 >>> small_scaled == small_scaled2
 True
 

diff --git a/sourmash/_minhash.py b/sourmash/_minhash.py
diff --git a/sourmash/commands.py b/sourmash/commands.py
@@ -111,7 +111,7 @@ def compare(args):
                 if not printed_scaled_msg:
                     notify('downsampling to scaled value of {}'.format(max_scaled))
                     printed_scaled_msg = True
-                s.minhash = s.minhash.downsample_scaled(max_scaled)
+                s.minhash = s.minhash.downsample(scaled=max_scaled)
 
     if len(siglist) == 0:
         error('no signatures!')
@@ -389,7 +389,7 @@ def index(args):
             nums.add(ss.minhash.num)
 
             if args.scaled:
-                ss.minhash = ss.minhash.downsample_scaled(args.scaled)
+                ss.minhash = ss.minhash.downsample(scaled=args.scaled)
             scaleds.add(ss.minhash.scaled)
 
             tree.insert(ss)
@@ -450,7 +450,7 @@ def search(args):
         if args.scaled != query.minhash.scaled:
             notify('downsampling query from scaled={} to {}',
                    query.minhash.scaled, int(args.scaled))
-        query.minhash = query.minhash.downsample_scaled(args.scaled)
+        query.minhash = query.minhash.downsample(scaled=args.scaled)
 
     # set up the search databases
     databases = sourmash_args.load_dbs_and_sigs(args.databases, query,
@@ -610,7 +610,7 @@ def gather(args):
     if args.scaled:
         notify('downsampling query from scaled={} to {}',
                query.minhash.scaled, int(args.scaled))
-        query.minhash = query.minhash.downsample_scaled(args.scaled)
+        query.minhash = query.minhash.downsample(scaled=args.scaled)
 
     # empty?
     if not len(query.minhash):
@@ -762,7 +762,7 @@ def multigather(args):
             if args.scaled:
                 notify('downsampling query from scaled={} to {}',
                        query.minhash.scaled, int(args.scaled))
-                query.minhash = query.minhash.downsample_scaled(args.scaled)
+                query.minhash = query.minhash.downsample(scaled=args.scaled)
 
             # empty?
             if not len(query.minhash):

diff --git a/sourmash/lca/command_classify.py b/sourmash/lca/command_classify.py
@@ -135,7 +135,7 @@ def classify(args):
                 total_count += 1
 
                 # make sure we're looking at the same scaled value as database
-                query_sig.minhash = query_sig.minhash.downsample_scaled(scaled)
+                query_sig.minhash = query_sig.minhash.downsample(scaled=scaled)
 
                 # do the classification
                 lineage, status = classify_signature(query_sig, dblist,

diff --git a/sourmash/lca/command_gather.py b/sourmash/lca/command_gather.py
@@ -199,7 +199,7 @@ def gather_main(args):
     debug('classifying', query_sig.name())
 
     # make sure we're looking at the same scaled value as database
-    query_sig.minhash = query_sig.minhash.downsample_scaled(scaled)
+    query_sig.minhash = query_sig.minhash.downsample(scaled=scaled)
 
     # do the classification, output results
     found = []

diff --git a/sourmash/lca/command_summarize.py b/sourmash/lca/command_summarize.py
@@ -126,7 +126,7 @@ def load_singletons_and_count(filenames, ksize, scaled, with_abundance, traverse
 
 def count_signature(sig, scaled, hashvals):
     "Downsample sig to given scaled, count hashvalues."
-    mh = sig.minhash.downsample_scaled(scaled)
+    mh = sig.minhash.downsample(scaled=scaled)
 
     if mh.track_abundance:
         abunds = mh.hashes

diff --git a/sourmash/lca/lca_db.py b/sourmash/lca/lca_db.py
@@ -125,7 +125,7 @@ def insert(self, sig, ident=None, lineage=None):
         # downsample to specified scaled; this has the side effect of
         # making sure they're all at the same scaled value!
         try:
-            minhash = minhash.downsample_scaled(self.scaled)
+            minhash = minhash.downsample(scaled=self.scaled)
         except ValueError:
             raise ValueError("cannot downsample signature; is it a scaled signature?")
 
@@ -456,7 +456,7 @@ def _find_signatures(self, minhash, threshold, containment=False,
         """
         # make sure we're looking at the same scaled value as database
         if self.scaled > minhash.scaled:
-            minhash = minhash.downsample_scaled(self.scaled)
+            minhash = minhash.downsample(scaled=self.scaled)
         elif self.scaled < minhash.scaled and not ignore_scaled:
             # note that containment can be calculated w/o matching scaled.
             raise ValueError("lca db scaled is {} vs query {}; must downsample".format(self.scaled, minhash.scaled))

diff --git a/sourmash/minhash.py b/sourmash/minhash.py
@@ -258,13 +258,6 @@ def add_sequence(self, sequence, force=False):
         self._methodcall(lib.kmerminhash_add_sequence, to_bytes(sequence),
                          force)
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use add_kmer instead.')
-    def add(self, kmer):
-        "Add a kmer into the sketch."
-        self.add_sequence(kmer)
-
     def add_kmer(self, kmer):
         "Add a kmer into the sketch."
         if len(kmer) != self.ksize:
@@ -286,13 +279,6 @@ def remove_many(self, hashes):
         "Remove many hashes at once; ``hashes`` must be an iterable."
         self._methodcall(lib.kmerminhash_remove_many, list(hashes), len(hashes))
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use add_many instead.')
-    def update(self, other):
-        "Update this sketch from all the hashes in the other."
-        self.add_many(other)
-
     def __len__(self):
         "Number of hashes."
         return self._methodcall(lib.kmerminhash_get_mins_size)
@@ -338,16 +324,6 @@ def hashes(self):
             d = self.get_mins()
             return _HashesWrapper({ k : 1 for k in d })
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION)
-    def subtract_mins(self, other):
-        """Get the list of mins in this MinHash, after removing the ones in
-        ``other``.
-        """
-        a = set(self.get_mins())
-        b = set(other.get_mins())
-        return a - b
-
     @property
     def seed(self):
         return self._methodcall(lib.kmerminhash_seed)
@@ -424,17 +400,6 @@ def clear(self):
         "Clears all hashes and abundances."
         return self._methodcall(lib.kmerminhash_clear)
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use translate_codon function at module level instead.')
-    def translate_codon(self, codon):
-        "Translate a codon into an amino acid."
-        try:
-            return rustcall(lib.sourmash_translate_codon,
-                            to_bytes(codon)).decode('utf-8')
-        except SourmashError as e:
-            raise ValueError(e.message)
-
     def count_common(self, other, downsample=False):
         """\
         Return the number of hashes in common between ``self`` and ``other``.
@@ -487,69 +452,6 @@ def downsample(self, num=None, scaled=None):
 
         return a
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use downsample(num=...) instead.')
-    def downsample_n(self, new_num):
-        "Copy this object and downsample new object to num=``new_num``."
-        return self.downsample(num=new_num)
-
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use scaled instead.')
-    def downsample_max_hash(self, *others):
-        """Copy this object and downsample new object to min of ``*others``.
-
-        Here, ``*others`` is one or more MinHash objects.
-        """
-        max_hashes = [x.max_hash for x in others]
-        new_max_hash = min(self.max_hash, *max_hashes)
-        new_scaled = _get_scaled_for_max_hash(new_max_hash)
-
-        return self.downsample_scaled(new_scaled)
-
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use downsample(scaled=...) instead.')
-    def downsample_scaled(self, new_scaled):
-        """Copy this object and downsample new object to scaled=``new_scaled``.
-        """
-        return self.downsample(scaled=new_scaled)
-
-    @deprecated(deprecated_in="3.3", removed_in="4.0",
-                current_version=VERSION,
-                details='Use count_common or set methods instead.')
-    def intersection(self, other, in_common=False):
-        """Calculate the intersection between ``self`` and ``other``, and
-        return ``(mins, size)`` where ``mins`` are the hashes in common, and
-        ``size`` is the number of hashes.
-
-        if ``in_common``, return the actual hashes. Otherwise, mins will be
-        empty.
-        """
-        if not isinstance(other, MinHash):
-            raise TypeError("Must be a MinHash!")
-
-        if self.num != other.num:
-            err = "must have same num: {} != {}".format(self.num, other.num)
-            raise TypeError(err)
-
-        if in_common:
-            # TODO: copy from buffer to Python land instead,
-            # this way involves more moving data around.
-            combined_mh = self.copy_and_clear()
-            combined_mh.merge(self)
-            combined_mh.merge(other)
-
-            size = len(combined_mh)
-            common = set(self.get_mins())
-            common.intersection_update(other.get_mins())
-        else:
-            size = self._methodcall(lib.kmerminhash_intersection, other._get_objptr())
-            common = set()
-
-        return common, max(size, 1)
-
     def flatten(self):
         """Return a new MinHash with track_abundance=False."""
         # create new object:
@@ -568,14 +470,6 @@ def jaccard(self, other, downsample=False):
             raise TypeError(err)
         return self._methodcall(lib.kmerminhash_similarity, other._get_objptr(), True, downsample)
 
-    @deprecated(deprecated_in="3.3", removed_in="4.0",
-                current_version=VERSION,
-                details="Use 'similarity' instead of compare.")
-    def compare(self, other, downsample=False):
-        "Calculate Jaccard similarity of two sketches."
-        return self.jaccard(other, downsample=downsample)
-
-
     def similarity(self, other, ignore_abundance=False, downsample=False):
         """Calculate similarity of two sketches.
 
@@ -611,14 +505,6 @@ def contained_by(self, other, downsample=False):
 
         return self.count_common(other, downsample) / len(self)
 
-    @deprecated(deprecated_in="3.3", removed_in="4.0",
-                current_version=VERSION,
-                details="Use 'contained_by' with downsample=True instead.")
-    def containment_ignore_maxhash(self, other):
-        """Calculate contained_by, with downsampling.
-        """
-        return self.contained_by(other, downsample=True)
-
     def __iadd__(self, other):
         if not isinstance(other, MinHash):
             raise TypeError("Must be a MinHash!")
@@ -650,19 +536,6 @@ def add_protein(self, sequence):
         "Add a protein sequence."
         self._methodcall(lib.kmerminhash_add_protein, to_bytes(sequence))
 
-    @deprecated(deprecated_in="3.5", removed_in="4.0",
-                current_version=VERSION,
-                details='Use the moltype property instead.')
-    def is_molecule_type(self, molecule):
-        """Check if this MinHash is a particular human-readable molecule type.
-
-        Supports 'protein', 'dayhoff', 'hp', 'DNA'.
-        @CTB deprecate for 4.0?
-        """
-        if molecule.lower() not in ('protein', 'dayhoff', 'hp', 'dna'):
-            raise ValueError("unknown moltype in query, '{}'".format(molecule))
-        return molecule == self.moltype
-
     @property
     def moltype(self):                    # TODO: test in minhash tests
         if self.is_protein:

diff --git a/sourmash/sbt.py b/sourmash/sbt.py
@@ -308,7 +308,7 @@ def search(self, query, *args, **kwargs):
         if tree_mh.scaled and query.minhash.scaled and \
           tree_mh.scaled > query.minhash.scaled:
             resampled_query_mh = tree_query.minhash
-            resampled_query_mh = resampled_query_mh.downsample_scaled(tree_mh.scaled)
+            resampled_query_mh = resampled_query_mh.downsample(scaled=tree_mh.scaled)
             tree_query = SourmashSignature(resampled_query_mh)
 
         # define both search function and post-search calculation function

diff --git a/sourmash/search.py b/sourmash/search.py
@@ -64,7 +64,7 @@ def search_databases(query, databases, threshold, do_containment, best_only,
 # build a new query object, subtracting found mins and downsampling
 def _subtract_and_downsample(to_remove, old_query, scaled=None):
     mh = old_query.minhash
-    mh = mh.downsample_scaled(scaled)
+    mh = mh.downsample(scaled=scaled)
     mh.remove_many(to_remove)
 
     return SourmashSignature(mh)
@@ -171,7 +171,7 @@ def gather_databases(query, databases, threshold_bp, ignore_abundance):
             float(len(orig_query_mins))
 
         # calculate fractions wrt second denominator - metagenome size
-        orig_query_mh = orig_query_mh.downsample_scaled(cmp_scaled)
+        orig_query_mh = orig_query_mh.downsample(scaled=cmp_scaled)
         query_n_mins = len(orig_query_mh)
         f_unique_to_query = len(intersect_mins) / float(query_n_mins)
 

diff --git a/sourmash_lib/__init__.py b/sourmash_lib/__init__.py