sourmash-bio · luizirber · Feb 5, 2024 · Dec 23, 2022 · Feb 5, 2024 · Feb 5, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,71 +1,26 @@
-default_language_version:
-  python: python3
 repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.2.0
-  hooks:
-  - id: check-ast
-#  - id: check-builtin-literals
-  - id: check-docstring-first
-  - id: check-merge-conflict
-  - id: check-yaml
-  - id: check-toml
-  - id: debug-statements
-#  - id: end-of-file-fixer
-#    exclude: 'tests/test-data'
-#  - id: trailing-whitespace
-#    exclude: 'tests/test-data'
-#- repo: https://github.com/asottile/pyupgrade
-#  rev: v2.7.2
-#  hooks:
-#  - id: pyupgrade
-#- repo: https://github.com/pre-commit/mirrors-isort
-#  rev: v5.4.2
-#  hooks:
-#  - id: isort
-#    additional_dependencies: [toml]
-
-# format using black
-# when the full codebase is black, use it directly;
-#  while it isn't, let's use darker to format new/changed code
-- repo: https://github.com/akaihola/darker
-  rev: 1.7.1
-  hooks:
-    - id: darker
-#- repo: https://github.com/psf/black
-#  rev: 20.8b1
-#  hooks:
-#  - id: black
-#    args:
-#    - --safe
-#    language_version: python3.8
-#- repo: https://github.com/asottile/blacken-docs
-#  rev: v1.8.0
-#  hooks:
-#  - id: blacken-docs
-#    additional_dependencies:
-#    - black==19.10b0
-#    language_version: python3.8
-
-#- repo: https://github.com/asottile/add-trailing-comma
-#  rev: v2.0.1
-#  hooks:
-#  - id: add-trailing-comma
-#- repo: https://github.com/pre-commit/pygrep-hooks
-#  rev: v1.6.0
-#  hooks:
-#  - id: rst-backticks
-#- repo: https://github.com/asottile/setup-cfg-fmt
-#  rev: v1.11.0
-#  hooks:
-#  - id: setup-cfg-fmt
-#    args:
-#    - --min-py3-version
-#    - '3.7'
-#- repo: https://gitlab.com/pycqa/flake8
-#  rev: 3.8.3
-#  hooks:
-#  - id: flake8
-#    additional_dependencies:
-#    - flake8-bugbear == 20.1.2
-#    language_version: python3.8
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: check-ast
+      - id: check-builtin-literals
+      - id: check-docstring-first
+      - id: check-merge-conflict
+      - id: check-yaml
+      - id: check-toml
+      - id: debug-statements
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.0
+    hooks:
+      - id: ruff-format
+      - id: ruff
+        args: ["--fix", "--unsafe-fixes", "--exit-non-zero-on-fix"]
+  - repo: https://github.com/tox-dev/tox-ini-fmt
+    rev: "0.5.2"
+    hooks:
+      - id: tox-ini-fmt
+        args: ["-p", "fix_lint"]
+  - repo: meta
+    hooks:
+      - id: check-hooks-apply
+      - id: check-useless-excludes
diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
@@ -4,43 +4,44 @@
 from sourmash.sbt_storage import ZipStorage
 from sourmash.minhash import MinHash
 
-RANDOM_SEQ_SIZE=3000
-RANDOM_SEQ_NUMBER=300
+RANDOM_SEQ_SIZE = 3000
+RANDOM_SEQ_NUMBER = 300
 
-MINHASH_NUM=500
-MINHASH_K=21
+MINHASH_NUM = 500
+MINHASH_K = 21
 
-GET_MINS_RANGE=500
-ADD_HASH_RANGE=10_000
-ADD_MANY_RANGE=1000
-SIMILARITY_TIMES=500
-COUNT_COMMON_TIMES=500
-MERGE_TIMES=500
-COPY_TIMES=500
-CONCAT_TIMES=500
-SET_ABUNDANCES_RANGE=500
-ZIP_STORAGE_WRITE=100_000
-ZIP_STORAGE_LOAD=20
+GET_MINS_RANGE = 500
+ADD_HASH_RANGE = 10_000
+ADD_MANY_RANGE = 1000
+SIMILARITY_TIMES = 500
+COUNT_COMMON_TIMES = 500
+MERGE_TIMES = 500
+COPY_TIMES = 500
+CONCAT_TIMES = 500
+SET_ABUNDANCES_RANGE = 500
+ZIP_STORAGE_WRITE = 100_000
+ZIP_STORAGE_LOAD = 20
 
 
 def load_sequences():
     sequences = []
     for i in range(10):
-        random_seq = random.sample("A,C,G,T".split(",") * RANDOM_SEQ_SIZE,
-                                   RANDOM_SEQ_NUMBER)
+        random_seq = random.sample(
+            "A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
+        )
         sequences.append("".join(random_seq))
     return sequences
 
 
 class TimeMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
-        self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K, is_protein=True,
-                                  track_abundance=False)
+        self.protein_mh = MinHash(
+            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
+        )
         self.sequences = load_sequences()
 
-        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K,
-                                    track_abundance=False)
+        self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
         for seq in self.sequences:
             self.populated_mh.add_sequence(seq)
 
@@ -103,8 +104,9 @@ def time_concat(self):
 class PeakmemMinHashSuite:
     def setup(self):
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
-        self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K,
-                                  is_protein=True, track_abundance=True)
+        self.protein_mh = MinHash(
+            MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=True
+        )
         self.sequences = load_sequences()
 
     def peakmem_add_sequence(self):
@@ -158,21 +160,25 @@ def time_set_abundances_noclear(self):
         for i in range(SET_ABUNDANCES_RANGE):
             mh.set_abundances(mins, clear=False)
 
+
 class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
     def setup(self):
         PeakmemMinHashSuite.setup(self)
         self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
 
+
 ####################
 
-class TimeZipStorageSuite:
 
+class TimeZipStorageSuite:
     def setup(self):
         import zipfile
+
         self.zipfile = NamedTemporaryFile()
 
-        with zipfile.ZipFile(self.zipfile, mode='w',
-                          compression=zipfile.ZIP_STORED) as storage:
+        with zipfile.ZipFile(
+            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
+        ) as storage:
             for i in range(ZIP_STORAGE_WRITE):
                 # just so we have lots of entries
                 storage.writestr(str(i), b"0")
@@ -196,17 +202,18 @@ def teardown(self):
 class PeakmemZipStorageSuite:
     def setup(self):
         import zipfile
+
         self.zipfile = NamedTemporaryFile()
 
-        with zipfile.ZipFile(self.zipfile, mode='w',
-                          compression=zipfile.ZIP_STORED) as storage:
+        with zipfile.ZipFile(
+            self.zipfile, mode="w", compression=zipfile.ZIP_STORED
+        ) as storage:
             for i in range(ZIP_STORAGE_WRITE):
                 # just so we have lots of entries
                 storage.writestr(str(i), b"0")
             # one big-ish entry
             storage.writestr("sig1", b"9" * 1_000_000)
 
-
     def peakmem_load_from_zipstorage(self):
         with ZipStorage(self.zipfile.name) as storage:
             for i in range(ZIP_STORAGE_LOAD):