Skip to content

Commit

Permalink
Add De/Compressors, igzip testing
Browse files Browse the repository at this point in the history
  • Loading branch information
milesgranger committed Sep 23, 2024
1 parent 6c54636 commit fae7b1f
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 12 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ wasm32-compat = ["libcramjam/wasm32-compat"]
[dependencies]
pyo3 = { version = "^0.22", default-features = false, features = ["macros"] }
# libcramjam = { version = "0.4.6", default-features = false }
# libcramjam = { path = "../libcramjam", default-features = false }
libcramjam = { git = "https://github.com/cramjam/libcramjam.git", branch = "add-igzip" }
libcramjam = { path = "../libcramjam", default-features = false }
# libcramjam = { git = "https://github.com/cramjam/libcramjam.git", branch = "add-igzip" }

[build-dependencies]
pyo3-build-config = "^0.22"
Expand Down
45 changes: 45 additions & 0 deletions src/igzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,49 @@ pub mod igzip {
crate::generic!(py, libcramjam::igzip::decompress[input, output]).map_err(DecompressionError::from_err)
}

/// IGZIP Compressor object for streaming compression
#[pyclass(unsendable)] // TODO: make sendable
pub struct Compressor {
inner: Option<libcramjam::igzip::isal::igzip::write::Encoder<Cursor<Vec<u8>>>>,
}

#[pymethods]
impl Compressor {
/// Initialize a new `Compressor` instance.
#[new]
#[pyo3(signature = (level=None))]
pub fn __init__(level: Option<u32>) -> PyResult<Self> {
let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL);
let inner = libcramjam::igzip::isal::igzip::write::Encoder::new(
Cursor::new(vec![]),
libcramjam::igzip::isal::igzip::CompressionLevel::try_from(level as isize)
.map_err(CompressionError::from_err)?,
true,
);
Ok(Self { inner: Some(inner) })
}

/// Compress input into the current compressor's stream.
pub fn compress(&mut self, input: &[u8]) -> PyResult<usize> {
crate::io::stream_compress(&mut self.inner, input)
}

/// Flush and return current compressed stream
pub fn flush(&mut self) -> PyResult<RustyBuffer> {
crate::io::stream_flush(&mut self.inner, |e| e.get_ref_mut())
}

/// Consume the current compressor state and return the compressed stream
/// **NB** The compressor will not be usable after this method is called.
pub fn finish(&mut self) -> PyResult<RustyBuffer> {
crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner()))
}
}

mod _decompressor {
use super::*;
crate::make_decompressor!(gzip);
}
#[pymodule_export]
use _decompressor::Decompressor;
}
36 changes: 26 additions & 10 deletions tests/test_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,28 @@
import numpy as np
import cramjam
import hashlib
from datetime import timedelta
from hypothesis import strategies as st, given, settings
from hypothesis.extra import numpy as st_np


VARIANTS = ("snappy", "brotli", "bzip2", "lz4", "gzip", "deflate", "zstd", "xz")
VARIANTS = (
"snappy",
"brotli",
"bzip2",
"lz4",
"gzip",
"deflate",
"zstd",
"xz",
)

if not hasattr(cramjam, "blosc2") and hasattr(cramjam, "experimental"):
cramjam.blosc2 = cramjam.experimental.blosc2
for experimental_feat in ("blosc2", "igzip"):
if not hasattr(cramjam, experimental_feat) and hasattr(cramjam, "experimental"):
mod = getattr(cramjam.experimental, experimental_feat)
setattr(cramjam, experimental_feat, mod)

if hasattr(cramjam, 'blosc2'):
VARIANTS = (*VARIANTS, "blosc2")
if hasattr(cramjam, experimental_feat):
VARIANTS = (*VARIANTS, experimental_feat)

# Some OS can be slow or have higher variability in their runtimes on CI
settings.register_profile("local", deadline=None, max_examples=20)
Expand Down Expand Up @@ -52,7 +62,9 @@ def test_variants_different_dtypes(variant_str, arr, is_pypy):
try:
compressed = variant.compress(arr)
except:
pytest.xfail(reason="PyPy struggles w/ multidim buffer views depending on dtype ie datetime[64]")
pytest.xfail(
reason="PyPy struggles w/ multidim buffer views depending on dtype ie datetime[64]"
)
else:
compressed = variant.compress(arr)
decompressed = variant.decompress(compressed)
Expand Down Expand Up @@ -100,7 +112,7 @@ def test_variants_compress_into(
# decompress_into appears to work fine.
# Further todo is finding out why with pytest.raises(...) is delayed in
# detecting the raised error. :S
if variant_str == 'blosc2' and output_type == cramjam.File:
if variant_str == "blosc2" and output_type == cramjam.File:
pytest.skip("NotImplementedError for blosc2 into File")

variant = getattr(cramjam, variant_str)
Expand Down Expand Up @@ -137,7 +149,9 @@ def test_variants_compress_into(
output = output_type(b"0" * compressed_len)

if is_pypy and isinstance(output, (bytes, memoryview)):
pytest.xfail(reason="PyPy de/compress_into w/ bytes or memoryview is a bit flaky behavior")
pytest.xfail(
reason="PyPy de/compress_into w/ bytes or memoryview is a bit flaky behavior"
)

n_bytes = variant.compress_into(input, output)
assert n_bytes == compressed_len
Expand Down Expand Up @@ -196,7 +210,9 @@ def test_variants_decompress_into(
output = output_type(b"0" * len(raw_data))

if is_pypy and isinstance(output, (bytes, memoryview)):
pytest.xfail(reason="PyPy de/compress_into w/ bytes or memoryview is a bit flaky behavior")
pytest.xfail(
reason="PyPy de/compress_into w/ bytes or memoryview is a bit flaky behavior"
)

n_bytes = variant.decompress_into(input, output)
assert n_bytes == len(raw_data)
Expand Down

0 comments on commit fae7b1f

Please sign in to comment.