Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce allocations & accept bytes and bytearray inputs #22

Merged
merged 12 commits into from
Feb 15, 2021
Merged
6 changes: 4 additions & 2 deletions .github/workflows/master.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [3.6, 3.7, 3.8]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2

Expand All @@ -28,7 +28,9 @@ jobs:
override: true

- name: Python Dev Install
run: pip install -r dev-requirements.txt
run: |
pip install --upgrade pip
pip install -r dev-requirements.txt

- name: Build Wheels - Linux
if: startsWith(matrix.os, 'ubuntu')
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [3.6]
python-version: [3.6, 3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
Expand All @@ -28,7 +28,9 @@ jobs:
architecture: 'x64'

- name: Install Dev requirements
run: pip install -r dev-requirements.txt
run: |
pip install --upgrade pip
pip install -r dev-requirements.txt

- name: Build Wheels - Linux
if: startsWith(matrix.os, 'ubuntu')
Expand Down
6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "1.3.2"
version = "2.0.0-rc1"
authors = ["Miles Granger <miles59923@gmail.com>"]
edition = "2018"
license-file = "LICENSE"
Expand All @@ -11,9 +11,9 @@ description = "Thin Python bindings to de/compression algorithms in Rust"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.13.1", features = ["abi3-py36", "extension-module"] }
pyo3 = { version = "0.13.1", features = ["extension-module"] }
snap = "^1"
brotli2 = "^0.3"
lz-fear = "0.1.1"
flate2 = "^1"
zstd = "0.5.1+zstd.1.4.4"
zstd = "0.6.0+zstd.1.4.8"
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ dev-install:
rm -rf ./wheels
maturin build --release --out wheels --interpreter $(shell which python)
pip uninstall cramjam -y
rm wheels/*.tar.gz
pip install --no-index wheels/*
18 changes: 14 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,20 @@ All available for use as:

```python
>>> import cramjam
>>> compessed = cramjam.snappy_compress(b"bytes here")
>>> cramjam.snappy_decompress(compressed)
>>> compessed = cramjam.snappy.compress(b"bytes here")
>>> cramjam.snappy.decompress(compressed)
b"bytes here"
```

Where the API is `cramjam.<compression-variant>_compress/decompress` and only accepts
python `byte` strings
Where the API is `cramjam.<compression-variant>.compress/decompress` and accepts
both `bytes` and `bytearray` objects.

**Special note!**
If you know the length of the de/compress output, you
can provide `output_len=<<some int>>` to any `de/compress`
to get ~1.5-3x performance increase as this allows single
buffer allocation.

For `snappy` with `bytearray`s, it's only a mild improvement
as we currently are able to estimate the buffer size and can
resize the resulting `bytearray` to the correct size.
320 changes: 184 additions & 136 deletions benchmarks/README.md

Large diffs are not rendered by default.

49 changes: 30 additions & 19 deletions benchmarks/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,19 @@ def round_trip(compress, decompress, data, **kwargs):
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy"
)
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_snappy_raw(benchmark, file, use_cramjam: bool):
def test_snappy(benchmark, file, use_cramjam: bool):
"""
Uses the non-framed format for snappy compression
"""
import snappy

data = file.read_bytes()
data = bytearray(file.read_bytes()) # bytearray avoids double allocation in cramjam snappy by default
# Can be slightly faster if passing output_len to compress/decompress ops
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.snappy_compress_raw,
decompress=cramjam.snappy_decompress_raw,
compress=cramjam.snappy.compress,
decompress=cramjam.snappy.decompress,
data=data,
)
else:
Expand All @@ -45,17 +46,27 @@ def test_snappy_raw(benchmark, file, use_cramjam: bool):
@pytest.mark.parametrize(
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "gzip"
)
@pytest.mark.parametrize("set_output_len", (True, False), ids=lambda val: f"used-output_len={val}")
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_gzip(benchmark, file, use_cramjam: bool):
def test_gzip(benchmark, file, use_cramjam: bool, set_output_len: bool):
data = file.read_bytes()
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.gzip_compress,
decompress=cramjam.gzip_decompress,
data=data,
level=9,
)
if set_output_len:
compressed_len = len(cramjam.gzip.compress(data))
benchmark(
round_trip,
compress=lambda bytes: cramjam.gzip.compress(bytes, level=9, output_len=compressed_len),
decompress=lambda bytes: cramjam.gzip.decompress(bytes, output_len=len(data)),
data=data,
)
else:
benchmark(
round_trip,
compress=cramjam.gzip.compress,
decompress=cramjam.gzip.decompress,
data=data,
level=9,
)
else:
benchmark(
round_trip,
Expand All @@ -77,8 +88,8 @@ def test_lz4(benchmark, file, use_cramjam: bool):
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.lz4_compress,
decompress=cramjam.lz4_decompress,
compress=cramjam.lz4.compress,
decompress=cramjam.lz4.decompress,
data=data,
level=4,
)
Expand All @@ -103,8 +114,8 @@ def test_brotli(benchmark, file, use_cramjam: bool):
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.brotli_compress,
decompress=cramjam.brotli_decompress,
compress=cramjam.brotli.compress,
decompress=cramjam.brotli.decompress,
data=data,
)
else:
Expand All @@ -127,9 +138,9 @@ def test_zstd(benchmark, file, use_cramjam: bool):
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.zstd_compress,
decompress=cramjam.zstd_decompress,
data=data,
compress=cramjam.zstd.compress,
decompress=cramjam.zstd.decompress,
data=data
)
else:
benchmark(
Expand Down
149 changes: 134 additions & 15 deletions src/brotli.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,138 @@
use brotli2::read::{BrotliDecoder, BrotliEncoder};
use std::error::Error;
use std::io::prelude::*;
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, Output};
use pyo3::prelude::*;
use pyo3::types::{PyByteArray, PyBytes};
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};

/// Decompress via Brotli
pub fn decompress(data: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
let mut decoder = BrotliDecoder::new(data);
let mut buf = vec![];
decoder.read_to_end(&mut buf)?;
Ok(buf)
pub fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
Ok(())
}

/// Compress via Brotli
pub fn compress(data: &[u8], level: u32) -> Result<Vec<u8>, Box<dyn Error>> {
let mut encoder = BrotliEncoder::new(data, level);
let mut buf = vec![];
encoder.read_to_end(&mut buf)?;
Ok(buf)
/// Brotli decompression.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.brotli.decompress(compressed_bytes, output_len=Optional[int])
/// ```
#[pyfunction]
pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option<usize>) -> PyResult<BytesType<'a>> {
match data {
BytesType::Bytes(input) => match output_len {
Some(len) => {
let pybytes = PyBytes::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?;
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?;
Ok(BytesType::Bytes(PyBytes::new(py, &buffer)))
}
},
BytesType::ByteArray(input) => match output_len {
Some(len) => {
let mut size = 0;
let pybytes = PyByteArray::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?;
Ok(())
})?;
pybytes.resize(size)?;
Ok(BytesType::ByteArray(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer)))
}
},
}
}

/// Brotli compression.
///
/// Python Example
/// --------------
/// ```python
/// >>> cramjam.brotli.compress(b'some bytes here', level=9, output_len=Option[int]) # level defaults to 11
/// ```
#[pyfunction]
pub fn compress<'a>(
py: Python<'a>,
data: BytesType<'a>,
level: Option<u32>,
output_len: Option<usize>,
) -> PyResult<BytesType<'a>> {
let level = level.unwrap_or_else(|| 11);
match data {
BytesType::Bytes(input) => match output_len {
Some(len) => {
let pybytes = PyBytes::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?;
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?;
Ok(BytesType::Bytes(PyBytes::new(py, &buffer)))
}
},
BytesType::ByteArray(input) => match output_len {
Some(len) => {
let mut size = 0;
let pybytes = PyByteArray::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?;
Ok(())
})?;
pybytes.resize(size)?;
Ok(BytesType::ByteArray(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer)))
}
},
}
}

mod internal {

use crate::Output;
use brotli2::read::{BrotliDecoder, BrotliEncoder};
use std::io::prelude::*;
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(data);
match output {
Output::Slice(slice) => decoder.read(slice),
Output::Vector(v) => decoder.read_to_end(v),
}
}

/// Compress via Brotli
pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: u32) -> Result<usize, Error> {
let mut encoder = BrotliEncoder::new(data, level);
match output {
Output::Slice(slice) => encoder.read(slice),
Output::Vector(v) => encoder.read_to_end(v),
}
}
}
Loading