Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre-commit updates #2427

Merged
merged 5 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 25 additions & 70 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,71 +1,26 @@
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: check-ast
# - id: check-builtin-literals
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
# - id: end-of-file-fixer
# exclude: 'tests/test-data'
# - id: trailing-whitespace
# exclude: 'tests/test-data'
#- repo: https://github.com/asottile/pyupgrade
# rev: v2.7.2
# hooks:
# - id: pyupgrade
#- repo: https://github.com/pre-commit/mirrors-isort
# rev: v5.4.2
# hooks:
# - id: isort
# additional_dependencies: [toml]

# format using black
# when the full codebase is black, use it directly;
# while it isn't, let's use darker to format new/changed code
- repo: https://github.com/akaihola/darker
rev: 1.7.1
hooks:
- id: darker
#- repo: https://github.com/psf/black
# rev: 20.8b1
# hooks:
# - id: black
# args:
# - --safe
# language_version: python3.8
#- repo: https://github.com/asottile/blacken-docs
# rev: v1.8.0
# hooks:
# - id: blacken-docs
# additional_dependencies:
# - black==19.10b0
# language_version: python3.8

#- repo: https://github.com/asottile/add-trailing-comma
# rev: v2.0.1
# hooks:
# - id: add-trailing-comma
#- repo: https://github.com/pre-commit/pygrep-hooks
# rev: v1.6.0
# hooks:
# - id: rst-backticks
#- repo: https://github.com/asottile/setup-cfg-fmt
# rev: v1.11.0
# hooks:
# - id: setup-cfg-fmt
# args:
# - --min-py3-version
# - '3.7'
#- repo: https://gitlab.com/pycqa/flake8
# rev: 3.8.3
# hooks:
# - id: flake8
# additional_dependencies:
# - flake8-bugbear == 20.1.2
# language_version: python3.8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: check-ast
- id: check-builtin-literals
- id: check-docstring-first
- id: check-merge-conflict
- id: check-yaml
- id: check-toml
- id: debug-statements
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.0
hooks:
- id: ruff-format
- id: ruff
args: ["--fix", "--unsafe-fixes", "--exit-non-zero-on-fix"]
- repo: https://github.com/tox-dev/tox-ini-fmt
rev: "0.5.2"
hooks:
- id: tox-ini-fmt
args: ["-p", "fix_lint"]
- repo: meta
hooks:
- id: check-hooks-apply
- id: check-useless-excludes
65 changes: 36 additions & 29 deletions benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,44 @@
from sourmash.sbt_storage import ZipStorage
from sourmash.minhash import MinHash

RANDOM_SEQ_SIZE=3000
RANDOM_SEQ_NUMBER=300
RANDOM_SEQ_SIZE = 3000
RANDOM_SEQ_NUMBER = 300

MINHASH_NUM=500
MINHASH_K=21
MINHASH_NUM = 500
MINHASH_K = 21

GET_MINS_RANGE=500
ADD_HASH_RANGE=10_000
ADD_MANY_RANGE=1000
SIMILARITY_TIMES=500
COUNT_COMMON_TIMES=500
MERGE_TIMES=500
COPY_TIMES=500
CONCAT_TIMES=500
SET_ABUNDANCES_RANGE=500
ZIP_STORAGE_WRITE=100_000
ZIP_STORAGE_LOAD=20
GET_MINS_RANGE = 500
ADD_HASH_RANGE = 10_000
ADD_MANY_RANGE = 1000
SIMILARITY_TIMES = 500
COUNT_COMMON_TIMES = 500
MERGE_TIMES = 500
COPY_TIMES = 500
CONCAT_TIMES = 500
SET_ABUNDANCES_RANGE = 500
ZIP_STORAGE_WRITE = 100_000
ZIP_STORAGE_LOAD = 20


def load_sequences():
sequences = []
for i in range(10):
random_seq = random.sample("A,C,G,T".split(",") * RANDOM_SEQ_SIZE,
RANDOM_SEQ_NUMBER)
random_seq = random.sample(
"A,C,G,T".split(",") * RANDOM_SEQ_SIZE, RANDOM_SEQ_NUMBER
)
sequences.append("".join(random_seq))
return sequences


class TimeMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K, is_protein=True,
track_abundance=False)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=False
)
self.sequences = load_sequences()

self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K,
track_abundance=False)
self.populated_mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=False)
for seq in self.sequences:
self.populated_mh.add_sequence(seq)

Expand Down Expand Up @@ -103,8 +104,9 @@ def time_concat(self):
class PeakmemMinHashSuite:
def setup(self):
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)
self.protein_mh = MinHash(MINHASH_NUM, MINHASH_K,
is_protein=True, track_abundance=True)
self.protein_mh = MinHash(
MINHASH_NUM, MINHASH_K, is_protein=True, track_abundance=True
)
self.sequences = load_sequences()

def peakmem_add_sequence(self):
Expand Down Expand Up @@ -158,21 +160,25 @@ def time_set_abundances_noclear(self):
for i in range(SET_ABUNDANCES_RANGE):
mh.set_abundances(mins, clear=False)


class PeakmemMinAbundanceSuite(PeakmemMinHashSuite):
def setup(self):
PeakmemMinHashSuite.setup(self)
self.mh = MinHash(MINHASH_NUM, MINHASH_K, track_abundance=True)


####################

class TimeZipStorageSuite:

class TimeZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
Expand All @@ -196,17 +202,18 @@ def teardown(self):
class PeakmemZipStorageSuite:
def setup(self):
import zipfile

self.zipfile = NamedTemporaryFile()

with zipfile.ZipFile(self.zipfile, mode='w',
compression=zipfile.ZIP_STORED) as storage:
with zipfile.ZipFile(
self.zipfile, mode="w", compression=zipfile.ZIP_STORED
) as storage:
for i in range(ZIP_STORAGE_WRITE):
# just so we have lots of entries
storage.writestr(str(i), b"0")
# one big-ish entry
storage.writestr("sig1", b"9" * 1_000_000)


def peakmem_load_from_zipstorage(self):
with ZipStorage(self.zipfile.name) as storage:
for i in range(ZIP_STORAGE_LOAD):
Expand Down
Loading
Loading