diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0dd50d41..f1ffbe7e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -121,6 +121,8 @@ jobs: - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: Audit + run: cargo install cargo-audit && cargo audit - name: Build Wheels uses: messense/maturin-action@v1 with: diff --git a/Cargo.lock b/Cargo.lock index cff3645a..4f00f05b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,21 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -21,23 +36,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "brotli-sys" -version = "0.3.2" +name = "brotli" +version = "3.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4445dea95f4c2b41cde57cc9fee236ae4dbae88d8fcbdb4750fc1bb5d86aaecd" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" dependencies = [ - "cc", - "libc", + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", ] [[package]] -name = "brotli2" -version = "0.3.2" +name = "brotli-decompressor" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb036c3eade309815c15ddbacec5b22c4d1f3983a774ab2eac2e3e9ea85568e" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" dependencies = [ - "brotli-sys", - "libc", + "alloc-no-stdlib", + "alloc-stdlib", ] [[package]] @@ -78,9 +94,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cramjam" -version = "2.6.0" +version = "2.6.1" dependencies = [ - "brotli2", + "brotli", "bzip2", "flate2", "lz4", diff --git a/Cargo.toml b/Cargo.toml index fde609f0..b9125e43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,8 @@ [package] name = "cramjam" -version = "2.6.0" +version = "2.6.1" authors = ["Miles Granger "] edition = "2018" -license = "MIT" license-file = "LICENSE" description = "Thin Python bindings to de/compression algorithms in Rust" readme = "README.md" @@ -25,7 +24,7 @@ opt-level = 3 [dependencies] pyo3 = { version = "0.16", default-features = false, features = ["macros"] } snap = "^1" -brotli2 = "^0.3" +brotli = { version = "^3", default-features = false, features = ["std"] } bzip2 = "^0.4" lz4 = "^1" flate2 = "^1" diff --git a/benchmark-requirements.txt b/benchmark-requirements.txt index 6f994b48..06dcbbd6 100644 --- a/benchmark-requirements.txt +++ b/benchmark-requirements.txt @@ -1,4 +1,4 @@ -pytest-benchmark==3.2.3 +pytest-benchmark==4.0.0 python-snappy==0.5.4 lz4==3.1.0 brotlipy==0.7.0 diff --git a/benchmarks/README.md b/benchmarks/README.md index bc79abdd..a07c85f4 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -293,34 +293,34 @@ test_lz4_block[urls.10K-python-lz4] 2,001.9611 (66.88) `make bench-brotli` ```bash ------------------------------------------------------------------------------------------------------ benchmark: 24 tests ------------------------------------------------------------------------------------------------------ -Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 17.8897 (1.0) 21.5322 (1.05) 18.4333 (1.0) 0.6208 (1.44) 18.2443 (1.0) 0.4319 (1.0) 5;5 54.2498 (1.0) 50 1 -test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 18.1533 (1.01) 20.4369 (1.0) 18.7759 (1.02) 0.5898 (1.37) 18.6078 (1.02) 0.8347 (1.93) 10;1 53.2598 (0.98) 50 1 -test_brotli[alice29.txt-brotli] 220.3170 (12.32) 230.1574 (11.26) 224.9658 (12.20) 3.8282 (8.88) 223.9629 (12.28) 5.6323 (13.04) 2;0 4.4451 (0.08) 5 1 -test_brotli[alice29.txt-cramjam] 226.4241 (12.66) 228.6188 (11.19) 227.6423 (12.35) 0.9194 (2.13) 227.9714 (12.50) 1.5160 (3.51) 2;0 4.3929 (0.08) 5 1 -test_brotli[asyoulik.txt-brotli] 176.8532 (9.89) 178.5816 (8.74) 177.9033 (9.65) 0.6775 (1.57) 177.9574 (9.75) 0.9254 (2.14) 2;0 5.6210 (0.10) 6 1 -test_brotli[asyoulik.txt-cramjam] 181.3138 (10.14) 185.9610 (9.10) 183.0713 (9.93) 1.8366 (4.26) 182.5439 (10.01) 2.9700 (6.88) 1;0 5.4624 (0.10) 6 1 -test_brotli[fireworks.jpeg-brotli] 70.7069 (3.95) 75.6588 (3.70) 71.9962 (3.91) 1.1901 (2.76) 71.7863 (3.93) 0.6675 (1.55) 2;1 13.8896 (0.26) 14 1 -test_brotli[fireworks.jpeg-cramjam] 71.3536 (3.99) 73.7503 (3.61) 71.8630 (3.90) 0.6269 (1.45) 71.7001 (3.93) 0.5713 (1.32) 1;1 13.9154 (0.26) 14 1 -test_brotli[geo.protodata-brotli] 124.7704 (6.97) 126.1844 (6.17) 125.5772 (6.81) 0.4313 (1.0) 125.5776 (6.88) 0.4719 (1.09) 2;0 7.9632 (0.15) 8 1 -test_brotli[geo.protodata-cramjam] 128.6971 (7.19) 130.4920 (6.39) 129.6153 (7.03) 0.6275 (1.45) 129.5793 (7.10) 1.0149 (2.35) 4;0 7.7151 (0.14) 8 1 -test_brotli[html-brotli] 133.0865 (7.44) 137.1792 (6.71) 134.8985 (7.32) 1.1569 (2.68) 134.9101 (7.39) 0.7996 (1.85) 2;2 7.4130 (0.14) 8 1 -test_brotli[html-cramjam] 136.2971 (7.62) 141.6241 (6.93) 138.3340 (7.50) 2.2464 (5.21) 137.6318 (7.54) 3.8429 (8.90) 2;0 7.2289 (0.13) 8 1 -test_brotli[html_x_4-brotli] 162.2292 (9.07) 164.1336 (8.03) 163.3756 (8.86) 0.7556 (1.75) 163.5865 (8.97) 1.3218 (3.06) 2;0 6.1209 (0.11) 6 1 -test_brotli[html_x_4-cramjam] 166.7431 (9.32) 168.3913 (8.24) 167.3817 (9.08) 0.6918 (1.60) 167.2347 (9.17) 1.1951 (2.77) 1;0 5.9744 (0.11) 6 1 -test_brotli[kppkn.gtb-brotli] 416.0493 (23.26) 420.2574 (20.56) 417.5143 (22.65) 1.6582 (3.84) 417.0724 (22.86) 1.9902 (4.61) 1;0 2.3951 (0.04) 5 1 -test_brotli[kppkn.gtb-cramjam] 432.7377 (24.19) 438.8289 (21.47) 434.3502 (23.56) 2.5329 (5.87) 433.5837 (23.77) 2.0103 (4.65) 1;1 2.3023 (0.04) 5 1 -test_brotli[lcet10.txt-brotli] 689.3127 (38.53) 695.1439 (34.01) 691.5692 (37.52) 2.2512 (5.22) 691.0756 (37.88) 2.8880 (6.69) 2;0 1.4460 (0.03) 5 1 -test_brotli[lcet10.txt-cramjam] 706.3514 (39.48) 730.3781 (35.74) 713.2318 (38.69) 9.7223 (22.54) 709.3515 (38.88) 7.2208 (16.72) 1;1 1.4021 (0.03) 5 1 -test_brotli[paper-100k.pdf-brotli] 397.2979 (22.21) 399.6303 (19.55) 398.6496 (21.63) 0.9994 (2.32) 398.5795 (21.85) 1.6975 (3.93) 1;0 2.5085 (0.05) 5 1 -test_brotli[paper-100k.pdf-cramjam] 397.1881 (22.20) 487.4731 (23.85) 418.1530 (22.68) 38.8914 (90.17) 401.4564 (22.00) 27.9225 (64.65) 1;1 2.3915 (0.04) 5 1 -test_brotli[plrabn12.txt-brotli] 766.4343 (42.84) 965.1578 (47.23) 822.5129 (44.62) 81.3593 (188.62) 785.6867 (43.06) 70.6752 (163.63) 1;1 1.2158 (0.02) 5 1 -test_brotli[plrabn12.txt-cramjam] 776.7893 (43.42) 791.7569 (38.74) 780.8096 (42.36) 6.4344 (14.92) 777.1387 (42.60) 7.2310 (16.74) 1;0 1.2807 (0.02) 5 1 -test_brotli[urls.10K-brotli] 1,232.4364 (68.89) 1,281.6436 (62.71) 1,251.4702 (67.89) 21.9635 (50.92) 1,239.3781 (67.93) 36.2594 (83.95) 1;0 0.7991 (0.01) 5 1 -test_brotli[urls.10K-cramjam] 1,256.7332 (70.25) 1,348.1630 (65.97) 1,284.4820 (69.68) 36.9337 (85.63) 1,274.7599 (69.87) 37.5966 (87.05) 1;0 0.7785 (0.01) 5 1 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------------- benchmark: 24 tests -------------------------------------------------------------------------------------------------- +Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 10.9184 (1.0) 14.6035 (1.0) 12.3503 (1.0) 1.1312 (1.74) 11.8966 (1.0) 2.1789 (5.62) 30;0 80.9697 (1.0) 82 1 +test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 14.0977 (1.29) 17.0087 (1.16) 14.5718 (1.18) 0.6504 (1.0) 14.3074 (1.20) 0.3876 (1.0) 5;5 68.6259 (0.85) 47 1 +test_brotli[alice29.txt-brotli] 141.2160 (12.93) 145.2801 (9.95) 142.9074 (11.57) 1.4637 (2.25) 142.5277 (11.98) 2.1899 (5.65) 2;0 6.9975 (0.09) 7 1 +test_brotli[alice29.txt-cramjam] 152.6566 (13.98) 158.3756 (10.85) 154.4223 (12.50) 1.9262 (2.96) 154.1212 (12.96) 1.7533 (4.52) 1;1 6.4757 (0.08) 7 1 +test_brotli[asyoulik.txt-brotli] 110.8312 (10.15) 116.3864 (7.97) 113.1018 (9.16) 1.9668 (3.02) 112.1098 (9.42) 3.2754 (8.45) 2;0 8.8416 (0.11) 9 1 +test_brotli[asyoulik.txt-cramjam] 120.2650 (11.01) 142.7060 (9.77) 130.4795 (10.56) 9.0843 (13.97) 129.5092 (10.89) 16.0443 (41.40) 3;0 7.6640 (0.09) 9 1 +test_brotli[fireworks.jpeg-brotli] 35.0424 (3.21) 52.9552 (3.63) 42.1037 (3.41) 6.8887 (10.59) 37.3856 (3.14) 12.9743 (33.48) 8;0 23.7509 (0.29) 23 1 +test_brotli[fireworks.jpeg-cramjam] 45.6972 (4.19) 51.4216 (3.52) 47.9267 (3.88) 1.4601 (2.25) 48.0562 (4.04) 2.0533 (5.30) 7;0 20.8652 (0.26) 22 1 +test_brotli[geo.protodata-brotli] 70.8891 (6.49) 73.5486 (5.04) 72.2200 (5.85) 0.8960 (1.38) 72.7037 (6.11) 1.5739 (4.06) 5;0 13.8466 (0.17) 14 1 +test_brotli[geo.protodata-cramjam] 91.4206 (8.37) 94.7283 (6.49) 93.2170 (7.55) 0.9418 (1.45) 93.4534 (7.86) 1.1083 (2.86) 4;0 10.7277 (0.13) 11 1 +test_brotli[html-brotli] 74.1237 (6.79) 76.8266 (5.26) 75.6515 (6.13) 0.9074 (1.40) 75.8785 (6.38) 1.3270 (3.42) 5;0 13.2185 (0.16) 13 1 +test_brotli[html-cramjam] 102.4915 (9.39) 125.3486 (8.58) 112.2085 (9.09) 9.9984 (15.37) 106.6630 (8.97) 19.7193 (50.88) 3;0 8.9120 (0.11) 10 1 +test_brotli[html_x_4-brotli] 87.5360 (8.02) 114.3479 (7.83) 97.7994 (7.92) 11.4060 (17.54) 90.7073 (7.62) 21.9963 (56.75) 3;0 10.2250 (0.13) 10 1 +test_brotli[html_x_4-cramjam] 105.3706 (9.65) 111.3189 (7.62) 107.3742 (8.69) 1.9368 (2.98) 107.6139 (9.05) 2.9738 (7.67) 3;0 9.3132 (0.12) 10 1 +test_brotli[kppkn.gtb-brotli] 271.9317 (24.91) 322.6200 (22.09) 284.3380 (23.02) 21.5044 (33.07) 276.2475 (23.22) 15.4194 (39.78) 1;1 3.5169 (0.04) 5 1 +test_brotli[kppkn.gtb-cramjam] 323.5000 (29.63) 328.4267 (22.49) 324.7345 (26.29) 2.0891 (3.21) 324.0014 (27.23) 1.7883 (4.61) 1;1 3.0794 (0.04) 5 1 +test_brotli[lcet10.txt-brotli] 442.0427 (40.49) 504.8650 (34.57) 462.6317 (37.46) 27.2407 (41.89) 446.6555 (37.54) 39.1690 (101.06) 1;0 2.1615 (0.03) 5 1 +test_brotli[lcet10.txt-cramjam] 444.3975 (40.70) 511.8355 (35.05) 463.9231 (37.56) 27.3992 (42.13) 453.0632 (38.08) 24.6888 (63.70) 1;1 2.1555 (0.03) 5 1 +test_brotli[paper-100k.pdf-brotli] 262.9858 (24.09) 307.7572 (21.07) 276.0070 (22.35) 18.9312 (29.11) 265.8558 (22.35) 22.6216 (58.37) 1;0 3.6231 (0.04) 5 1 +test_brotli[paper-100k.pdf-cramjam] 91.9629 (8.42) 95.6179 (6.55) 93.8155 (7.60) 1.3233 (2.03) 93.5711 (7.87) 2.4300 (6.27) 5;0 10.6592 (0.13) 11 1 +test_brotli[plrabn12.txt-brotli] 483.2054 (44.26) 497.8085 (34.09) 487.2513 (39.45) 6.0998 (9.38) 484.6980 (40.74) 6.3864 (16.48) 1;0 2.0523 (0.03) 5 1 +test_brotli[plrabn12.txt-cramjam] 502.6484 (46.04) 530.4874 (36.33) 511.1903 (41.39) 11.2501 (17.30) 506.6601 (42.59) 11.8233 (30.51) 1;0 1.9562 (0.02) 5 1 +test_brotli[urls.10K-brotli] 731.0115 (66.95) 808.1545 (55.34) 753.5258 (61.01) 32.1086 (49.37) 736.5432 (61.91) 35.3638 (91.24) 1;0 1.3271 (0.02) 5 1 +test_brotli[urls.10K-cramjam] 722.6436 (66.19) 823.0498 (56.36) 743.6850 (60.22) 44.3756 (68.23) 724.4152 (60.89) 26.3356 (67.95) 1;1 1.3447 (0.02) 5 1 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ``` #### Bzip2 diff --git a/src/brotli.rs b/src/brotli.rs index c903b274..fd90833b 100644 --- a/src/brotli.rs +++ b/src/brotli.rs @@ -5,9 +5,11 @@ use crate::{to_py_err, BytesType}; use pyo3::prelude::*; use pyo3::wrap_pyfunction; use pyo3::PyResult; -use std::io::Cursor; +use std::io::{Cursor, Write}; const DEFAULT_COMPRESSION_LEVEL: u32 = 11; +const BUF_SIZE: usize = 1 << 17; // Taken from brotli kCompressFragementTwoPassBlockSize +const LGWIN: u32 = 22; pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; @@ -59,7 +61,7 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult>>>, + inner: Option>>>, } #[pymethods] @@ -68,7 +70,7 @@ impl Compressor { #[new] pub fn __init__(level: Option) -> PyResult { let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let inner = brotli2::write::BrotliEncoder::new(Cursor::new(vec![]), level); + let inner = brotli::CompressorWriter::new(Cursor::new(vec![]), BUF_SIZE, level, LGWIN); Ok(Self { inner: Some(inner) }) } @@ -85,20 +87,21 @@ impl Compressor { /// Consume the current compressor state and return the compressed stream /// **NB** The compressor will not be usable after this method is called. pub fn finish(&mut self) -> PyResult { - crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + crate::io::stream_finish(&mut self.inner, |mut inner| { + inner.flush().map(|_| inner.into_inner().into_inner()) + }) } } pub(crate) mod internal { - use crate::brotli::DEFAULT_COMPRESSION_LEVEL; - use brotli2::read::{BrotliDecoder, BrotliEncoder}; + use crate::brotli::{BUF_SIZE, DEFAULT_COMPRESSION_LEVEL, LGWIN}; use std::io::prelude::*; use std::io::Error; /// Decompress via Brotli pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = BrotliDecoder::new(input); + let mut decoder = brotli::Decompressor::new(input, BUF_SIZE); let n_bytes = std::io::copy(&mut decoder, output)?; Ok(n_bytes as usize) } @@ -106,7 +109,7 @@ pub(crate) mod internal { /// Compress via Brotli pub fn compress(input: R, output: &mut W, level: Option) -> Result { let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); - let mut encoder = BrotliEncoder::new(input, level); + let mut encoder = brotli::CompressorReader::new(input, BUF_SIZE, level, LGWIN); let n_bytes = std::io::copy(&mut encoder, output)?; Ok(n_bytes as usize) } diff --git a/src/lib.rs b/src/lib.rs index ceb62c24..13b53647 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -311,7 +311,7 @@ mod tests { test_variant!(snappy, compressed_len = 2572398,); test_variant!(gzip, compressed_len = 157192, level = None); - test_variant!(brotli, compressed_len = 729, level = None); + test_variant!(brotli, compressed_len = 128, level = None); test_variant!(bzip2, compressed_len = 14207, level = None); test_variant!(deflate, compressed_len = 157174, level = None); test_variant!(zstd, compressed_len = 4990, level = None);