Skip to content

Commit

Permalink
Integration tests, benchmark updates, subclass Buffer (#83)
Browse files Browse the repository at this point in the history
Will close #30

Add integration tests with data compressed by third party implementations
Update benchmarks for use with igizp
Allow subclassing cramjam.Buffer
  • Loading branch information
milesgranger authored Jul 15, 2022
1 parent fb8d88a commit 0d40139
Show file tree
Hide file tree
Showing 14 changed files with 130 additions and 41 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,4 @@ dmypy.json
#Added by cargo

/target
.vscode/
1 change: 1 addition & 0 deletions benchmark-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ python-snappy==0.5.4
lz4==3.1.0
brotlipy==0.7.0
zstd==1.5.0.2
isal==0.11.1
numpy
memory-profiler
79 changes: 48 additions & 31 deletions benchmarks/README.md

Large diffs are not rendered by default.

24 changes: 16 additions & 8 deletions benchmarks/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,27 +121,35 @@ def test_cramjam_snappy_de_compress_into(benchmark, op, file):
)


@pytest.mark.parametrize(
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "gzip"
)
@pytest.mark.parametrize("lib", ("gzip", "cramjam", "isal"), ids=lambda val: val)
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_gzip(benchmark, file, use_cramjam: bool):
def test_gzip(benchmark, file, lib):
from isal import igzip

data = file.read_bytes()
if use_cramjam:
if lib == "cramjam":
benchmark(
round_trip,
compress=cramjam.gzip.compress,
decompress=cramjam.gzip.decompress,
data=data,
level=9,
level=3,
)
else:
elif lib == "gzip":
benchmark(
round_trip,
compress=gzip.compress,
decompress=gzip.decompress,
data=data,
compresslevel=9,
compresslevel=3,
)
else:
benchmark(
round_trip,
compress=igzip.compress,
decompress=igzip.decompress,
data=data,
compresslevel=igzip._COMPRESS_LEVEL_BEST, # 3
)


Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
black==21.5b1
black==22.3.0
maturin
numpy
pytest>=5.3.0
Expand Down
2 changes: 1 addition & 1 deletion src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ impl RustyFile {
/// b'bytes'
/// ```
///
#[pyclass(name = "Buffer")]
#[pyclass(subclass, name = "Buffer")]
#[derive(Default)]
pub struct RustyBuffer {
pub(crate) inner: Cursor<Vec<u8>>,
Expand Down
21 changes: 21 additions & 0 deletions tests/data/integration/plaintext.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
Binary file added tests/data/integration/plaintext.txt.br
Binary file not shown.
Binary file added tests/data/integration/plaintext.txt.bz2
Binary file not shown.
Binary file added tests/data/integration/plaintext.txt.gz
Binary file not shown.
Binary file added tests/data/integration/plaintext.txt.lz4
Binary file not shown.
Binary file added tests/data/integration/plaintext.txt.snappy
Binary file not shown.
Binary file added tests/data/integration/plaintext.txt.zst
Binary file not shown.
41 changes: 41 additions & 0 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Test decompressing files which have been compressed from
main stream third party implementations, separate from this project.
"""
import sys
import pathlib
from collections import namedtuple

import pytest
import cramjam


@pytest.fixture
def integration_dir():
return pathlib.Path(__file__).parent.joinpath("data/integration")


@pytest.fixture
def plaintext(integration_dir):
return integration_dir.joinpath("plaintext.txt").read_bytes()


Variant = namedtuple("Variant", ("name", "suffix"))


@pytest.mark.skipif(sys.platform.startswith("win"), reason="Bytes comparison fails on windows")
@pytest.mark.parametrize(
"variant",
(
Variant("gzip", "gz"),
Variant("bzip2", "bz2"),
Variant("zstd", "zst"),
Variant("brotli", "br"),
Variant("lz4", "lz4"),
Variant("snappy", "snappy"),
),
)
def test_variant(variant: Variant, integration_dir: pathlib.Path, plaintext: bytes):
file = integration_dir.joinpath(f"plaintext.txt.{variant.suffix}")
decompress = getattr(cramjam, variant.name).decompress
assert bytes(decompress(file.read_bytes())) == plaintext

0 comments on commit 0d40139

Please sign in to comment.