diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 80c6215e..cec842f2 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.6, 3.7, 3.8] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 @@ -28,7 +28,9 @@ jobs: override: true - name: Python Dev Install - run: pip install -r dev-requirements.txt + run: | + pip install --upgrade pip + pip install -r dev-requirements.txt - name: Build Wheels - Linux if: startsWith(matrix.os, 'ubuntu') diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3cd0e4b5..33a0a617 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: [3.6] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 @@ -28,7 +28,9 @@ jobs: architecture: 'x64' - name: Install Dev requirements - run: pip install -r dev-requirements.txt + run: | + pip install --upgrade pip + pip install -r dev-requirements.txt - name: Build Wheels - Linux if: startsWith(matrix.os, 'ubuntu') diff --git a/Cargo.toml b/Cargo.toml index fc0d9891..aa83bb60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam" -version = "1.3.2" +version = "2.0.0-rc1" authors = ["Miles Granger "] edition = "2018" license-file = "LICENSE" @@ -11,9 +11,9 @@ description = "Thin Python bindings to de/compression algorithms in Rust" crate-type = ["cdylib"] [dependencies] -pyo3 = { version = "0.13.1", features = ["abi3-py36", "extension-module"] } +pyo3 = { version = "0.13.1", features = ["extension-module"] } snap = "^1" brotli2 = "^0.3" lz-fear = "0.1.1" flate2 = "^1" -zstd = "0.5.1+zstd.1.4.4" +zstd = "0.6.0+zstd.1.4.8" diff --git a/Makefile b/Makefile index 27ffda90..d83bf286 100644 --- a/Makefile +++ b/Makefile @@ -25,4 +25,5 @@ dev-install: rm -rf ./wheels maturin build --release --out wheels --interpreter $(shell which python) pip uninstall cramjam -y + rm wheels/*.tar.gz pip install --no-index wheels/* diff --git a/README.md b/README.md index 2f3bb8b1..3584d2c6 100644 --- a/README.md +++ b/README.md @@ -39,10 +39,20 @@ All available for use as: ```python >>> import cramjam ->>> compessed = cramjam.snappy_compress(b"bytes here") ->>> cramjam.snappy_decompress(compressed) +>>> compessed = cramjam.snappy.compress(b"bytes here") +>>> cramjam.snappy.decompress(compressed) b"bytes here" ``` -Where the API is `cramjam._compress/decompress` and only accepts -python `byte` strings +Where the API is `cramjam..compress/decompress` and accepts +both `bytes` and `bytearray` objects. + +**Special note!** +If you know the length of the de/compress output, you +can provide `output_len=<>` to any `de/compress` +to get ~1.5-3x performance increase as this allows single +buffer allocation. + +For `snappy` with `bytearray`s, it's only a mild improvement +as we currently are able to estimate the buffer size and can +resize the resulting `bytearray` to the correct size. diff --git a/benchmarks/README.md b/benchmarks/README.md index bdd663e4..3df23a71 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -14,41 +14,89 @@ lib installed. Can install all via `pip install -r benchmark-requirements.txt`. The resulting output from benchmarks indicate what file and lib was used, (`cramjam` vs `gzip`) `test_gzip[urls.10K-cramjam]` indicates `cramjam` and `benchmarks/data/urls.10K` file was used during the gzip benchmark. + +Special performance notes: +--- +In general, if the length of the output from de/compression is known, for any variant, +you can provide `output_len=<>` to get a 1-4x performance increase. This +is allows us to avoid double allocation. + +Furthermore, you can provide `output_len` equal to the "at most" length when +de/compressing `bytearray` objects; as these can be resized after the de/compression +is finished and actual size is known for basically the same performance. This is not +true for `bytes` objects, as we cannot resize after de/compression and the result +will be trialing null bytes. + +For snappy used in conjunction with `bytearray`s, this is only midly helpful as +we automatically estimate the buffer size and can resize the `bytearray` after. + + --- #### Gzip `make bench-gzip` + +Notes +--- +This benchmark is parameterized over setting `output_len=True/False` as an example +of the performance benefit that can be expected from various files/sizes. + + ```bash ---------------------------------------------------------------------------------------------------------- benchmark: 24 tests --------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_gzip[Mark.Twain-Tom.Sawyer.txt-cramjam] 497.7920 (1.0) 927.0370 (1.0) 512.6448 (1.0) 38.7377 (1.03) 501.2350 (1.0) 4.6807 (1.0) 89;331 1,950.6684 (1.0) 1533 1 -test_gzip[Mark.Twain-Tom.Sawyer.txt-gzip] 573.7430 (1.15) 986.6440 (1.06) 591.0132 (1.15) 37.5584 (1.0) 578.2010 (1.15) 14.7650 (3.15) 97;163 1,692.0095 (0.87) 1470 1 -test_gzip[alice29.txt-cramjam] 10,788.0000 (21.67) 13,869.3350 (14.96) 11,136.2440 (21.72) 522.4084 (13.91) 11,004.6575 (21.96) 145.8700 (31.16) 5;8 89.7969 (0.05) 90 1 -test_gzip[alice29.txt-gzip] 11,869.4080 (23.84) 14,129.1380 (15.24) 12,164.6976 (23.73) 403.3387 (10.74) 12,050.8860 (24.04) 168.4588 (35.99) 5;6 82.2051 (0.04) 83 1 -test_gzip[asyoulik.txt-cramjam] 8,285.3550 (16.64) 11,234.8790 (12.12) 8,510.5130 (16.60) 423.3875 (11.27) 8,410.6555 (16.78) 114.5175 (24.47) 6;13 117.5017 (0.06) 120 1 -test_gzip[asyoulik.txt-gzip] 8,952.9650 (17.99) 11,291.9470 (12.18) 9,346.9739 (18.23) 415.8719 (11.07) 9,241.8710 (18.44) 145.0840 (31.00) 7;10 106.9865 (0.05) 90 1 -test_gzip[fireworks.jpeg-cramjam] 3,822.6550 (7.68) 6,172.2910 (6.66) 3,983.1931 (7.77) 326.3236 (8.69) 3,891.8400 (7.76) 123.2523 (26.33) 8;11 251.0549 (0.13) 147 1 -test_gzip[fireworks.jpeg-gzip] 3,197.8900 (6.42) 5,114.0700 (5.52) 3,306.4640 (6.45) 208.9637 (5.56) 3,260.7100 (6.51) 101.4152 (21.67) 9;10 302.4379 (0.16) 285 1 -test_gzip[geo.protodata-cramjam] 1,577.1590 (3.17) 2,818.3670 (3.04) 1,654.2466 (3.23) 144.8015 (3.86) 1,614.0270 (3.22) 63.4920 (13.56) 31;37 604.5048 (0.31) 574 1 -test_gzip[geo.protodata-gzip] 2,113.4380 (4.25) 3,371.9360 (3.64) 2,211.3350 (4.31) 127.9896 (3.41) 2,169.3200 (4.33) 84.6785 (18.09) 26;24 452.2155 (0.23) 429 1 -test_gzip[html-cramjam] 1,956.7170 (3.93) 3,061.7890 (3.30) 2,028.4278 (3.96) 121.2626 (3.23) 1,990.7950 (3.97) 75.0007 (16.02) 29;28 492.9926 (0.25) 439 1 -test_gzip[html-gzip] 2,458.9250 (4.94) 3,761.2100 (4.06) 2,540.1700 (4.96) 135.4718 (3.61) 2,504.6555 (5.00) 89.0160 (19.02) 21;16 393.6744 (0.20) 390 1 -test_gzip[html_x_4-cramjam] 9,322.9810 (18.73) 11,875.0880 (12.81) 9,564.6007 (18.66) 366.2080 (9.75) 9,482.4370 (18.92) 138.2243 (29.53) 6;8 104.5522 (0.05) 107 1 -test_gzip[html_x_4-gzip] 10,878.6620 (21.85) 13,747.6080 (14.83) 11,147.0642 (21.74) 395.4767 (10.53) 11,072.2015 (22.09) 140.1490 (29.94) 4;5 89.7097 (0.05) 90 1 -test_gzip[kppkn.gtb-cramjam] 39,047.1010 (78.44) 41,982.6430 (45.29) 39,376.7768 (76.81) 574.0887 (15.29) 39,231.4520 (78.27) 179.2995 (38.31) 1;1 25.3957 (0.01) 24 1 -test_gzip[kppkn.gtb-gzip] 62,117.8520 (124.79) 64,566.2150 (69.65) 62,617.8781 (122.15) 730.3686 (19.45) 62,403.7350 (124.50) 133.9970 (28.63) 2;2 15.9699 (0.01) 16 1 -test_gzip[lcet10.txt-cramjam] 26,369.5180 (52.97) 29,075.0250 (31.36) 26,686.8723 (52.06) 447.7174 (11.92) 26,645.4015 (53.16) 185.7090 (39.68) 1;2 37.4716 (0.02) 34 1 -test_gzip[lcet10.txt-gzip] 29,200.5550 (58.66) 32,278.3550 (34.82) 29,595.5600 (57.73) 546.9773 (14.56) 29,446.1480 (58.75) 150.3415 (32.12) 3;4 33.7889 (0.02) 31 1 -test_gzip[paper-100k.pdf-cramjam] 2,770.0760 (5.56) 4,955.2750 (5.35) 2,878.5382 (5.62) 246.0345 (6.55) 2,814.7090 (5.62) 100.5495 (21.48) 14;15 347.3985 (0.18) 355 1 -test_gzip[paper-100k.pdf-gzip] 3,214.4760 (6.46) 5,073.3990 (5.47) 3,339.4041 (6.51) 196.4699 (5.23) 3,277.1590 (6.54) 117.5240 (25.11) 18;18 299.4546 (0.15) 295 1 -test_gzip[plrabn12.txt-cramjam] 48,263.9040 (96.96) 50,260.6310 (54.22) 48,724.9557 (95.05) 550.9577 (14.67) 48,620.2370 (97.00) 470.5795 (100.54) 2;2 20.5234 (0.01) 20 1 -test_gzip[plrabn12.txt-gzip] 52,464.0520 (105.39) 54,261.8340 (58.53) 52,956.9974 (103.30) 505.1568 (13.45) 52,805.4230 (105.35) 561.5632 (119.97) 3;1 18.8832 (0.01) 19 1 -test_gzip[urls.10K-cramjam] 35,862.0130 (72.04) 38,657.0320 (41.70) 36,375.8052 (70.96) 524.4842 (13.96) 36,286.5185 (72.39) 441.9370 (94.42) 1;1 27.4908 (0.01) 26 1 -test_gzip[urls.10K-gzip] 39,871.9070 (80.10) 41,759.3410 (45.05) 40,245.7284 (78.51) 479.4244 (12.76) 40,118.9995 (80.04) 261.3920 (55.84) 2;3 24.8474 (0.01) 24 1 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------- benchmark: 48 tests ---------------------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-cramjam] 519.7970 (1.00) 1,039.8630 (1.10) 554.8131 (1.0) 57.9148 (1.11) 539.2260 (1.0) 35.0980 (1.0) 116;117 1,802.4088 (1.0) 1548 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=False-gzip] 574.1360 (1.11) 949.8510 (1.00) 612.0804 (1.10) 53.3911 (1.02) 597.0000 (1.11) 40.9002 (1.17) 157;125 1,633.7722 (0.91) 1529 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-cramjam] 518.2490 (1.0) 1,060.6960 (1.12) 557.3019 (1.00) 58.1539 (1.11) 540.5990 (1.00) 38.5972 (1.10) 173;161 1,794.3595 (1.00) 1733 1 +test_gzip[Mark.Twain-Tom.Sawyer.txt-used-output_len=True-gzip] 573.6970 (1.11) 948.0410 (1.0) 610.1559 (1.10) 52.2425 (1.0) 595.9855 (1.11) 39.6890 (1.13) 137;112 1,638.9253 (0.91) 1402 1 +test_gzip[alice29.txt-used-output_len=False-cramjam] 11,126.7610 (21.47) 12,438.6120 (13.12) 11,511.1064 (20.75) 218.1752 (4.18) 11,467.2385 (21.27) 284.9400 (8.12) 19;2 86.8726 (0.05) 84 1 +test_gzip[alice29.txt-used-output_len=False-gzip] 11,987.2370 (23.13) 13,140.4910 (13.86) 12,407.8334 (22.36) 229.7621 (4.40) 12,373.9645 (22.95) 245.7390 (7.00) 25;3 80.5942 (0.04) 78 1 +test_gzip[alice29.txt-used-output_len=True-cramjam] 10,691.9930 (20.63) 12,840.6640 (13.54) 11,162.0962 (20.12) 263.0858 (5.04) 11,151.9600 (20.68) 216.9515 (6.18) 17;3 89.5889 (0.05) 85 1 +test_gzip[alice29.txt-used-output_len=True-gzip] 12,080.5280 (23.31) 14,159.6150 (14.94) 12,455.4363 (22.45) 258.2839 (4.94) 12,409.5640 (23.01) 197.6640 (5.63) 14;4 80.2862 (0.04) 80 1 +test_gzip[asyoulik.txt-used-output_len=False-cramjam] 8,554.8190 (16.51) 11,292.3600 (11.91) 8,928.3944 (16.09) 373.9090 (7.16) 8,848.9265 (16.41) 243.9890 (6.95) 4;4 112.0022 (0.06) 110 1 +test_gzip[asyoulik.txt-used-output_len=False-gzip] 9,105.9290 (17.57) 11,139.4500 (11.75) 9,525.5454 (17.17) 300.7739 (5.76) 9,455.9970 (17.54) 232.7680 (6.63) 14;5 104.9809 (0.06) 103 1 +test_gzip[asyoulik.txt-used-output_len=True-cramjam] 8,229.7350 (15.88) 10,414.6030 (10.99) 8,601.9106 (15.50) 305.7064 (5.85) 8,556.6525 (15.87) 263.3710 (7.50) 11;3 116.2532 (0.06) 116 1 +test_gzip[asyoulik.txt-used-output_len=True-gzip] 9,188.1850 (17.73) 11,324.9780 (11.95) 9,595.0928 (17.29) 296.8139 (5.68) 9,532.5355 (17.68) 308.6825 (8.79) 21;2 104.2199 (0.06) 104 1 +test_gzip[fireworks.jpeg-used-output_len=False-cramjam] 4,112.5220 (7.94) 4,831.0910 (5.10) 4,302.3705 (7.75) 135.0648 (2.59) 4,285.4420 (7.95) 177.0450 (5.04) 69;7 232.4300 (0.13) 233 1 +test_gzip[fireworks.jpeg-used-output_len=False-gzip] 3,199.6390 (6.17) 5,390.3660 (5.69) 3,390.9647 (6.11) 209.0182 (4.00) 3,358.5240 (6.23) 161.0110 (4.59) 19;14 294.9013 (0.16) 291 1 +test_gzip[fireworks.jpeg-used-output_len=True-cramjam] 949.3340 (1.83) 1,899.1360 (2.00) 1,005.6629 (1.81) 86.4962 (1.66) 986.2460 (1.83) 62.0280 (1.77) 72;50 994.3690 (0.55) 1031 1 +test_gzip[fireworks.jpeg-used-output_len=True-gzip] 3,200.2920 (6.18) 4,227.5530 (4.46) 3,387.2603 (6.11) 136.9476 (2.62) 3,354.4090 (6.22) 182.4480 (5.20) 77;4 295.2238 (0.16) 291 1 +test_gzip[geo.protodata-used-output_len=False-cramjam] 1,738.4310 (3.35) 2,604.3500 (2.75) 1,849.7277 (3.33) 106.3325 (2.04) 1,827.7090 (3.39) 90.1718 (2.57) 62;33 540.6201 (0.30) 555 1 +test_gzip[geo.protodata-used-output_len=False-gzip] 2,147.9430 (4.14) 2,577.6970 (2.72) 2,257.4456 (4.07) 84.2493 (1.61) 2,242.1480 (4.16) 99.5392 (2.84) 114;18 442.9786 (0.25) 453 1 +test_gzip[geo.protodata-used-output_len=True-cramjam] 1,725.6690 (3.33) 2,718.6700 (2.87) 1,836.1617 (3.31) 89.9981 (1.72) 1,815.8690 (3.37) 85.5890 (2.44) 101;31 544.6144 (0.30) 510 1 +test_gzip[geo.protodata-used-output_len=True-gzip] 2,127.9530 (4.11) 3,138.8670 (3.31) 2,272.0031 (4.10) 122.4131 (2.34) 2,247.9540 (4.17) 115.8465 (3.30) 47;18 440.1402 (0.24) 429 1 +test_gzip[html-used-output_len=False-cramjam] 2,104.5090 (4.06) 3,175.1630 (3.35) 2,225.5431 (4.01) 115.0906 (2.20) 2,201.1520 (4.08) 104.5572 (2.98) 53;20 449.3285 (0.25) 443 1 +test_gzip[html-used-output_len=False-gzip] 2,500.7590 (4.83) 3,616.7810 (3.82) 2,641.0550 (4.76) 123.6254 (2.37) 2,620.8410 (4.86) 111.6068 (3.18) 56;20 378.6366 (0.21) 359 1 +test_gzip[html-used-output_len=True-cramjam] 2,079.2420 (4.01) 3,400.4040 (3.59) 2,222.3146 (4.01) 138.7664 (2.66) 2,188.7960 (4.06) 103.3685 (2.95) 44;30 449.9813 (0.25) 453 1 +test_gzip[html-used-output_len=True-gzip] 2,478.4130 (4.78) 3,809.0550 (4.02) 2,638.6508 (4.76) 126.2851 (2.42) 2,616.6670 (4.85) 128.5782 (3.66) 50;9 378.9816 (0.21) 333 1 +test_gzip[html_x_4-used-output_len=False-cramjam] 10,130.5030 (19.55) 11,372.8290 (12.00) 10,590.8331 (19.09) 273.9986 (5.24) 10,554.1310 (19.57) 426.2085 (12.14) 26;0 94.4213 (0.05) 91 1 +test_gzip[html_x_4-used-output_len=False-gzip] 11,047.6520 (21.32) 14,428.7200 (15.22) 11,484.8105 (20.70) 479.1162 (9.17) 11,389.2690 (21.12) 283.3758 (8.07) 5;5 87.0715 (0.05) 89 1 +test_gzip[html_x_4-used-output_len=True-cramjam] 9,458.3850 (18.25) 11,396.0560 (12.02) 9,943.5637 (17.92) 322.5896 (6.17) 9,892.4310 (18.35) 408.9162 (11.65) 28;3 100.5676 (0.06) 101 1 +test_gzip[html_x_4-used-output_len=True-gzip] 10,967.9200 (21.16) 12,620.0610 (13.31) 11,653.9219 (21.01) 420.0944 (8.04) 11,559.7885 (21.44) 674.0520 (19.20) 24;0 85.8080 (0.05) 76 1 +test_gzip[kppkn.gtb-used-output_len=False-cramjam] 40,256.6710 (77.68) 43,371.8420 (45.75) 41,135.3838 (74.14) 763.5835 (14.62) 41,052.4730 (76.13) 553.2935 (15.76) 4;3 24.3100 (0.01) 25 1 +test_gzip[kppkn.gtb-used-output_len=False-gzip] 64,079.5300 (123.65) 66,277.4240 (69.91) 64,924.9089 (117.02) 565.3003 (10.82) 64,802.9350 (120.18) 815.0115 (23.22) 3;0 15.4024 (0.01) 16 1 +test_gzip[kppkn.gtb-used-output_len=True-cramjam] 40,419.5090 (77.99) 47,952.8860 (50.58) 42,200.0288 (76.06) 1,794.2808 (34.35) 41,631.6520 (77.21) 2,457.0025 (70.00) 3;1 23.6967 (0.01) 24 1 +test_gzip[kppkn.gtb-used-output_len=True-gzip] 64,210.5460 (123.90) 66,613.3220 (70.26) 65,246.4269 (117.60) 741.0967 (14.19) 65,081.6270 (120.69) 1,015.2845 (28.93) 6;0 15.3265 (0.01) 16 1 +test_gzip[lcet10.txt-used-output_len=False-cramjam] 27,573.1010 (53.20) 34,978.2250 (36.90) 29,197.9029 (52.63) 1,652.9253 (31.64) 28,802.4685 (53.41) 736.2490 (20.98) 3;3 34.2490 (0.02) 36 1 +test_gzip[lcet10.txt-used-output_len=False-gzip] 30,280.9470 (58.43) 32,562.8860 (34.35) 31,243.7204 (56.31) 571.5751 (10.94) 31,290.7620 (58.03) 880.7153 (25.09) 12;0 32.0064 (0.02) 33 1 +test_gzip[lcet10.txt-used-output_len=True-cramjam] 9,555.1300 (18.44) 10,782.8600 (11.37) 9,912.5000 (17.87) 228.0582 (4.37) 9,872.4590 (18.31) 251.0783 (7.15) 26;4 100.8827 (0.06) 101 1 +test_gzip[lcet10.txt-used-output_len=True-gzip] 29,901.7910 (57.70) 32,865.9390 (34.67) 30,719.4480 (55.37) 747.3969 (14.31) 30,474.8960 (56.52) 705.1667 (20.09) 4;3 32.5527 (0.02) 33 1 +test_gzip[paper-100k.pdf-used-output_len=False-cramjam] 2,967.7560 (5.73) 4,924.8240 (5.19) 3,123.3734 (5.63) 150.6200 (2.88) 3,098.6220 (5.75) 129.4330 (3.69) 28;14 320.1667 (0.18) 317 1 +test_gzip[paper-100k.pdf-used-output_len=False-gzip] 3,213.1740 (6.20) 4,502.4180 (4.75) 3,398.0615 (6.12) 153.2343 (2.93) 3,375.8890 (6.26) 153.5790 (4.38) 51;12 294.2854 (0.16) 286 1 +test_gzip[paper-100k.pdf-used-output_len=True-cramjam] 2,516.1480 (4.86) 3,818.5730 (4.03) 2,644.5456 (4.77) 114.8152 (2.20) 2,623.0940 (4.86) 130.8360 (3.73) 69;8 378.1368 (0.21) 346 1 +test_gzip[paper-100k.pdf-used-output_len=True-gzip] 3,219.8630 (6.21) 4,964.1200 (5.24) 3,397.6387 (6.12) 181.2687 (3.47) 3,368.3955 (6.25) 150.5255 (4.29) 25;11 294.3221 (0.16) 280 1 +test_gzip[plrabn12.txt-used-output_len=False-cramjam] 49,762.2040 (96.02) 52,859.5690 (55.76) 50,738.7668 (91.45) 738.6969 (14.14) 50,563.2525 (93.77) 699.4550 (19.93) 4;2 19.7088 (0.01) 20 1 +test_gzip[plrabn12.txt-used-output_len=False-gzip] 53,138.4560 (102.53) 57,021.3700 (60.15) 54,463.9891 (98.17) 935.8547 (17.91) 54,264.2380 (100.63) 1,299.5680 (37.03) 5;0 18.3608 (0.01) 19 1 +test_gzip[plrabn12.txt-used-output_len=True-cramjam] 11,388.8090 (21.98) 15,003.8590 (15.83) 11,920.0316 (21.48) 539.3715 (10.32) 11,808.2690 (21.90) 325.8090 (9.28) 7;7 83.8924 (0.05) 83 1 +test_gzip[plrabn12.txt-used-output_len=True-gzip] 53,538.2260 (103.31) 56,180.2010 (59.26) 54,724.8323 (98.64) 721.1164 (13.80) 54,695.3400 (101.43) 949.2280 (27.05) 7;0 18.2732 (0.01) 19 1 +test_gzip[urls.10K-used-output_len=False-cramjam] 38,097.4640 (73.51) 45,489.4830 (47.98) 39,572.1519 (71.33) 1,431.1925 (27.40) 39,344.5560 (72.96) 1,227.8805 (34.98) 2;1 25.2703 (0.01) 24 1 +test_gzip[urls.10K-used-output_len=False-gzip] 41,281.0130 (79.65) 43,838.5800 (46.24) 42,369.0851 (76.37) 765.5144 (14.65) 42,351.6570 (78.54) 1,138.2070 (32.43) 9;0 23.6021 (0.01) 23 1 +test_gzip[urls.10K-used-output_len=True-cramjam] 7,313.0000 (14.11) 9,913.0270 (10.46) 7,718.4330 (13.91) 313.5716 (6.00) 7,633.9700 (14.16) 345.0630 (9.83) 17;5 129.5600 (0.07) 130 1 +test_gzip[urls.10K-used-output_len=True-gzip] 41,650.3010 (80.37) 44,860.9970 (47.32) 42,876.6647 (77.28) 703.7033 (13.47) 42,893.8560 (79.55) 851.1110 (24.25) 6;1 23.3227 (0.01) 24 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` @@ -57,34 +105,34 @@ test_gzip[urls.10K-gzip] 39,871.9070 (80.10) 41,759.3 `make bench-snappy` ```bash ---------------------------------------------------------------------------------------------------------- benchmark: 24 tests ---------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-cramjam] 52.3690 (3.40) 115.8890 (2.22) 55.2396 (3.43) 5.9556 (3.52) 53.4210 (3.39) 1.1660 (4.29) 1048;1401 18,102.9494 (0.29) 11589 1 -test_snappy_raw[Mark.Twain-Tom.Sawyer.txt-snappy] 52.6630 (3.41) 108.0780 (2.07) 54.2757 (3.37) 3.3904 (2.00) 53.5820 (3.40) 0.3935 (1.45) 488;1355 18,424.4486 (0.30) 12256 1 -test_snappy_raw[alice29.txt-cramjam] 614.1800 (39.82) 951.0760 (18.25) 632.5591 (39.32) 25.7672 (15.23) 621.4280 (39.41) 18.8097 (69.15) 173;112 1,580.8800 (0.03) 1565 1 -test_snappy_raw[alice29.txt-snappy] 600.9050 (38.96) 857.3690 (16.45) 616.9804 (38.35) 24.8116 (14.66) 604.3860 (38.33) 19.9527 (73.36) 164;90 1,620.7971 (0.03) 1391 1 -test_snappy_raw[asyoulik.txt-cramjam] 548.1430 (35.54) 820.2290 (15.74) 562.4624 (34.96) 21.9863 (12.99) 553.0150 (35.07) 15.2803 (56.18) 217;168 1,777.8965 (0.03) 1767 1 -test_snappy_raw[asyoulik.txt-snappy] 533.1530 (34.56) 712.1160 (13.66) 547.3970 (34.03) 23.3257 (13.78) 536.1730 (34.01) 16.1405 (59.34) 195;158 1,826.8277 (0.03) 1836 1 -test_snappy_raw[fireworks.jpeg-cramjam] 36.8510 (2.39) 80.3290 (1.54) 38.4149 (2.39) 3.3261 (1.97) 37.5030 (2.38) 0.9852 (3.62) 769;1143 26,031.5717 (0.42) 18833 1 -test_snappy_raw[fireworks.jpeg-snappy] 15.4250 (1.0) 52.1130 (1.0) 16.0880 (1.0) 1.6923 (1.0) 15.7670 (1.0) 0.2720 (1.0) 843;3054 62,158.2882 (1.0) 36288 1 -test_snappy_raw[geo.protodata-cramjam] 159.0690 (10.31) 277.0210 (5.32) 164.3288 (10.21) 10.5643 (6.24) 161.3640 (10.23) 1.3160 (4.84) 394;1085 6,085.3621 (0.10) 5532 1 -test_snappy_raw[geo.protodata-snappy] 143.0880 (9.28) 427.6350 (8.21) 149.2621 (9.28) 10.3206 (6.10) 146.8510 (9.31) 2.2190 (8.16) 423;863 6,699.6256 (0.11) 6144 1 -test_snappy_raw[html-cramjam] 168.1440 (10.90) 467.2520 (8.97) 174.6428 (10.86) 14.7798 (8.73) 170.3380 (10.80) 2.6872 (9.88) 364;676 5,725.9735 (0.09) 5397 1 -test_snappy_raw[html-snappy] 156.3440 (10.14) 280.2920 (5.38) 160.8994 (10.00) 9.7030 (5.73) 158.0980 (10.03) 1.1595 (4.26) 373;1257 6,215.0630 (0.10) 5811 1 -test_snappy_raw[html_x_4-cramjam] 694.2620 (45.01) 900.5190 (17.28) 711.4322 (44.22) 27.8857 (16.48) 698.9225 (44.33) 20.0220 (73.61) 116;84 1,405.6153 (0.02) 1034 1 -test_snappy_raw[html_x_4-snappy] 633.0930 (41.04) 864.6650 (16.59) 649.1615 (40.35) 27.1781 (16.06) 637.7115 (40.45) 17.9000 (65.81) 160;114 1,540.4487 (0.02) 1538 1 -test_snappy_raw[kppkn.gtb-cramjam] 516.7710 (33.50) 760.5490 (14.59) 530.4332 (32.97) 23.1890 (13.70) 520.5000 (33.01) 14.7900 (54.37) 184;165 1,885.2514 (0.03) 1811 1 -test_snappy_raw[kppkn.gtb-snappy] 503.7630 (32.66) 702.6630 (13.48) 516.1728 (32.08) 21.9923 (13.00) 505.8520 (32.08) 14.5700 (53.57) 214;193 1,937.3358 (0.03) 1944 1 -test_snappy_raw[lcet10.txt-cramjam] 1,641.4740 (106.42) 2,062.8950 (39.59) 1,676.7033 (104.22) 48.0968 (28.42) 1,655.1000 (104.97) 40.8570 (150.21) 75;57 596.4084 (0.01) 546 1 -test_snappy_raw[lcet10.txt-snappy] 1,590.3140 (103.10) 1,956.3550 (37.54) 1,626.9634 (101.13) 47.5634 (28.11) 1,600.7900 (101.53) 41.9140 (154.10) 97;60 614.6420 (0.01) 621 1 -test_snappy_raw[paper-100k.pdf-cramjam] 36.2280 (2.35) 80.2130 (1.54) 37.8268 (2.35) 3.4155 (2.02) 37.0410 (2.35) 0.5730 (2.11) 813;2607 26,436.3044 (0.43) 17202 1 -test_snappy_raw[paper-100k.pdf-snappy] 20.5150 (1.33) 213.3630 (4.09) 21.5959 (1.34) 2.5731 (1.52) 21.1570 (1.34) 0.3500 (1.29) 828;2983 46,304.9918 (0.74) 26395 1 -test_snappy_raw[plrabn12.txt-cramjam] 2,411.8190 (156.36) 3,147.9140 (60.41) 2,612.8756 (162.41) 67.9935 (40.18) 2,593.1220 (164.47) 85.0065 (312.52) 51;9 382.7201 (0.01) 364 1 -test_snappy_raw[plrabn12.txt-snappy] 2,186.4340 (141.75) 2,516.2830 (48.29) 2,231.8268 (138.73) 50.8453 (30.05) 2,214.3780 (140.44) 60.5810 (222.72) 68;13 448.0634 (0.01) 402 1 -test_snappy_raw[urls.10K-cramjam] 2,368.2700 (153.53) 2,713.3270 (52.07) 2,421.8313 (150.54) 58.8285 (34.76) 2,396.8985 (152.02) 66.5005 (244.49) 66;15 412.9107 (0.01) 376 1 -test_snappy_raw[urls.10K-snappy] 1,813.1410 (117.55) 2,218.1260 (42.56) 1,850.6340 (115.03) 49.7677 (29.41) 1,834.3030 (116.34) 43.2400 (158.97) 76;49 540.3553 (0.01) 464 1 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +-------------------------------------------------------------------------------------------------------- benchmark: 24 tests --------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_snappy[Mark.Twain-Tom.Sawyer.txt-cramjam] 68.0490 (4.44) 236.4670 (3.23) 74.1624 (4.38) 12.2258 (2.78) 70.6645 (4.42) 3.8065 (6.30) 393;654 13,483.9307 (0.23) 4220 1 +test_snappy[Mark.Twain-Tom.Sawyer.txt-snappy] 52.8250 (3.45) 209.7710 (2.86) 57.8734 (3.42) 10.0498 (2.29) 55.1665 (3.45) 2.0325 (3.37) 1153;1990 17,279.1036 (0.29) 13972 1 +test_snappy[alice29.txt-cramjam] 296.4050 (19.34) 513.2750 (7.00) 319.8824 (18.88) 26.2029 (5.96) 314.6850 (19.70) 20.1032 (33.30) 257;192 3,126.1485 (0.05) 2463 1 +test_snappy[alice29.txt-snappy] 601.3360 (39.24) 896.2440 (12.23) 642.0986 (37.90) 43.3138 (9.85) 637.0050 (39.87) 44.1420 (73.11) 176;101 1,557.3933 (0.03) 1583 1 +test_snappy[asyoulik.txt-cramjam] 314.2370 (20.51) 638.6460 (8.72) 337.8341 (19.94) 34.3076 (7.80) 325.6750 (20.38) 22.5742 (37.39) 273;269 2,960.0329 (0.05) 2917 1 +test_snappy[asyoulik.txt-snappy] 532.4610 (34.75) 881.1000 (12.02) 567.8900 (33.52) 43.3191 (9.85) 555.2595 (34.75) 41.0570 (68.00) 214;144 1,760.9043 (0.03) 1830 1 +test_snappy[fireworks.jpeg-cramjam] 41.9580 (2.74) 401.9790 (5.49) 46.1660 (2.73) 9.2259 (2.10) 44.0280 (2.76) 2.2282 (3.69) 1134;2184 21,660.9429 (0.37) 16493 1 +test_snappy[fireworks.jpeg-snappy] 15.3240 (1.0) 125.1180 (1.71) 16.9414 (1.0) 4.3968 (1.0) 15.9770 (1.0) 0.6038 (1.0) 1364;1766 59,027.0559 (1.0) 21891 1 +test_snappy[geo.protodata-cramjam] 109.6360 (7.15) 224.8130 (3.07) 119.6756 (7.06) 13.9476 (3.17) 114.2810 (7.15) 7.8245 (12.96) 765;871 8,355.9254 (0.14) 6107 1 +test_snappy[geo.protodata-snappy] 142.9090 (9.33) 335.9500 (4.58) 157.0690 (9.27) 18.1315 (4.12) 150.7170 (9.43) 10.6040 (17.56) 789;833 6,366.6276 (0.11) 5992 1 +test_snappy[html-cramjam] 148.6780 (9.70) 305.4290 (4.17) 159.0390 (9.39) 15.5382 (3.53) 154.0180 (9.64) 9.1190 (15.10) 702;779 6,287.7660 (0.11) 5853 1 +test_snappy[html-snappy] 156.2010 (10.19) 305.7490 (4.17) 168.3579 (9.94) 17.6330 (4.01) 161.9390 (10.14) 10.3047 (17.07) 563;600 5,939.7280 (0.10) 4239 1 +test_snappy[html_x_4-cramjam] 159.5320 (10.41) 504.7690 (6.89) 168.8123 (9.96) 18.3322 (4.17) 160.9430 (10.07) 9.5325 (15.79) 254;297 5,923.7385 (0.10) 2973 1 +test_snappy[html_x_4-snappy] 633.7830 (41.36) 1,055.7020 (14.41) 683.7402 (40.36) 56.0549 (12.75) 667.5120 (41.78) 45.0975 (74.70) 180;105 1,462.5438 (0.02) 1408 1 +test_snappy[kppkn.gtb-cramjam] 205.8880 (13.44) 391.8670 (5.35) 219.9534 (12.98) 21.6411 (4.92) 212.6530 (13.31) 14.3255 (23.73) 316;301 4,546.4180 (0.08) 2705 1 +test_snappy[kppkn.gtb-snappy] 504.1990 (32.90) 879.5870 (12.00) 546.0574 (32.23) 51.4486 (11.70) 531.9175 (33.29) 42.8270 (70.93) 237;198 1,831.3095 (0.03) 1774 1 +test_snappy[lcet10.txt-cramjam] 286.5250 (18.70) 559.8080 (7.64) 311.1426 (18.37) 33.3866 (7.59) 297.2815 (18.61) 28.1930 (46.70) 356;283 3,213.9606 (0.05) 3010 1 +test_snappy[lcet10.txt-snappy] 1,591.0940 (103.83) 2,195.9160 (29.97) 1,711.3473 (101.02) 106.9863 (24.33) 1,676.0640 (104.90) 107.2650 (177.66) 126;32 584.3349 (0.01) 586 1 +test_snappy[paper-100k.pdf-cramjam] 48.8270 (3.19) 107.3140 (1.46) 53.8996 (3.18) 7.4700 (1.70) 50.9340 (3.19) 3.0750 (5.09) 1894;2399 18,553.0061 (0.31) 12112 1 +test_snappy[paper-100k.pdf-snappy] 19.9350 (1.30) 73.2760 (1.0) 22.8507 (1.35) 4.4256 (1.01) 21.3490 (1.34) 1.7780 (2.94) 4126;4357 43,762.4046 (0.74) 26118 1 +test_snappy[plrabn12.txt-cramjam] 524.0590 (34.20) 988.9050 (13.50) 558.4253 (32.96) 50.0324 (11.38) 542.4060 (33.95) 38.8713 (64.38) 171;133 1,790.7497 (0.03) 1629 1 +test_snappy[plrabn12.txt-snappy] 2,316.0010 (151.14) 3,066.4880 (41.85) 2,449.0969 (144.56) 122.0491 (27.76) 2,421.2130 (151.54) 113.4473 (187.90) 74;30 408.3138 (0.01) 401 1 +test_snappy[urls.10K-cramjam] 480.5240 (31.36) 960.4570 (13.11) 516.9837 (30.52) 53.2631 (12.11) 497.5085 (31.14) 38.3690 (63.55) 178;165 1,934.2969 (0.03) 1622 1 +test_snappy[urls.10K-snappy] 1,934.9160 (126.27) 2,981.1910 (40.68) 2,050.5994 (121.04) 125.9153 (28.64) 2,022.6760 (126.60) 100.3135 (166.15) 48;29 487.6623 (0.01) 501 1 +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` #### Lz4 @@ -92,34 +140,34 @@ test_snappy_raw[urls.10K-snappy] 1,813.1410 (117.55) 2,2 `make bench-lz4` ```bash ----------------------------------------------------------------------------------------------------------- benchmark: 24 tests ---------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -test_lz4[Mark.Twain-Tom.Sawyer.txt-cramjam] 203.4740 (1.0) 444.8800 (1.38) 224.6050 (1.06) 26.0675 (1.88) 216.5980 (1.05) 8.5017 (8.80) 365;607 4,452.2600 (0.94) 4369 1 -test_lz4[Mark.Twain-Tom.Sawyer.txt-python-lz4] 205.5300 (1.01) 323.3270 (1.0) 212.0422 (1.0) 13.9019 (1.0) 206.8320 (1.0) 3.3853 (3.50) 157;196 4,716.0415 (1.0) 1343 1 -test_lz4[alice29.txt-cramjam] 1,923.4740 (9.45) 2,223.4950 (6.88) 1,965.3029 (9.27) 47.7473 (3.43) 1,949.8210 (9.43) 52.7460 (54.60) 75;32 508.8274 (0.11) 522 1 -test_lz4[alice29.txt-python-lz4] 3,041.9810 (14.95) 3,775.3870 (11.68) 3,101.6142 (14.63) 75.2325 (5.41) 3,070.2935 (14.84) 86.3640 (89.40) 41;6 322.4128 (0.07) 284 1 -test_lz4[asyoulik.txt-cramjam] 1,513.2880 (7.44) 1,909.5500 (5.91) 1,547.3341 (7.30) 44.6617 (3.21) 1,522.1690 (7.36) 42.7083 (44.21) 93;45 646.2728 (0.14) 625 1 -test_lz4[asyoulik.txt-python-lz4] 2,721.3430 (13.37) 3,034.5050 (9.39) 2,775.2225 (13.09) 59.9928 (4.32) 2,750.2920 (13.30) 75.6480 (78.31) 60;9 360.3315 (0.08) 351 1 -test_lz4[fireworks.jpeg-cramjam] 302.6000 (1.49) 428.0990 (1.32) 309.0598 (1.46) 15.2582 (1.10) 303.5465 (1.47) 0.9660 (1.0) 187;514 3,235.6199 (0.69) 2132 1 -test_lz4[fireworks.jpeg-python-lz4] 2,648.8040 (13.02) 3,357.4280 (10.38) 2,728.6830 (12.87) 71.7825 (5.16) 2,706.7730 (13.09) 78.2895 (81.05) 53;11 366.4772 (0.08) 359 1 -test_lz4[geo.protodata-cramjam] 508.2790 (2.50) 646.2610 (2.00) 521.5591 (2.46) 20.3970 (1.47) 512.9110 (2.48) 9.2445 (9.57) 182;244 1,917.3284 (0.41) 1704 1 -test_lz4[geo.protodata-python-lz4] 690.1580 (3.39) 1,039.7180 (3.22) 711.8781 (3.36) 28.5339 (2.05) 700.7910 (3.39) 16.3645 (16.94) 111;108 1,404.7349 (0.30) 1136 1 -test_lz4[html-cramjam] 532.0860 (2.62) 692.3440 (2.14) 544.6823 (2.57) 22.7418 (1.64) 534.2530 (2.58) 15.3568 (15.90) 165;143 1,835.9326 (0.39) 1439 1 -test_lz4[html-python-lz4] 697.1870 (3.43) 1,348.5820 (4.17) 720.6965 (3.40) 58.3927 (4.20) 705.8270 (3.41) 19.6258 (20.32) 74;110 1,387.5467 (0.29) 1283 1 -test_lz4[html_x_4-cramjam] 2,250.3640 (11.06) 2,993.5320 (9.26) 2,294.9767 (10.82) 57.7055 (4.15) 2,278.1760 (11.01) 57.5988 (59.63) 39;9 435.7343 (0.09) 335 1 -test_lz4[html_x_4-python-lz4] 2,864.5650 (14.08) 3,763.0070 (11.64) 2,949.5807 (13.91) 102.1472 (7.35) 2,926.3090 (14.15) 86.7158 (89.77) 23;13 339.0312 (0.07) 305 1 -test_lz4[kppkn.gtb-cramjam] 1,825.6790 (8.97) 2,237.8240 (6.92) 1,862.1173 (8.78) 48.1581 (3.46) 1,849.8330 (8.94) 42.7253 (44.23) 51;29 537.0231 (0.11) 439 1 -test_lz4[kppkn.gtb-python-lz4] 3,231.7460 (15.88) 3,588.2730 (11.10) 3,316.1698 (15.64) 62.4040 (4.49) 3,294.2780 (15.93) 83.7080 (86.65) 56;9 301.5527 (0.06) 292 1 -test_lz4[lcet10.txt-cramjam] 5,081.3180 (24.97) 8,123.0820 (25.12) 5,204.8228 (24.55) 251.5433 (18.09) 5,151.7120 (24.91) 113.3202 (117.31) 7;11 192.1295 (0.04) 195 1 -test_lz4[lcet10.txt-python-lz4] 8,138.7970 (40.00) 10,127.4950 (31.32) 8,333.2891 (39.30) 218.4185 (15.71) 8,300.2250 (40.13) 161.9515 (167.65) 9;6 120.0006 (0.03) 117 1 -test_lz4[paper-100k.pdf-cramjam] 418.5770 (2.06) 689.9690 (2.13) 428.1164 (2.02) 21.2000 (1.52) 420.0800 (2.03) 4.1125 (4.26) 191;476 2,335.8133 (0.50) 2133 1 -test_lz4[paper-100k.pdf-python-lz4] 1,711.3010 (8.41) 2,172.5910 (6.72) 1,749.2066 (8.25) 45.5884 (3.28) 1,728.4740 (8.36) 41.2590 (42.71) 66;44 571.6878 (0.12) 538 1 -test_lz4[plrabn12.txt-cramjam] 6,176.6160 (30.36) 6,769.9650 (20.94) 6,291.9010 (29.67) 104.2639 (7.50) 6,270.1985 (30.32) 123.0515 (127.38) 36;6 158.9345 (0.03) 156 1 -test_lz4[plrabn12.txt-python-lz4] 11,555.2180 (56.79) 12,696.5330 (39.27) 11,776.3184 (55.54) 164.0822 (11.80) 11,730.5170 (56.72) 145.7985 (150.93) 15;3 84.9162 (0.02) 84 1 -test_lz4[urls.10K-cramjam] 6,161.1970 (30.28) 6,947.9270 (21.49) 6,294.6157 (29.69) 118.5858 (8.53) 6,283.8170 (30.38) 157.5450 (163.09) 36;4 158.8659 (0.03) 166 1 -test_lz4[urls.10K-python-lz4] 9,186.5620 (45.15) 9,727.7520 (30.09) 9,330.3774 (44.00) 97.1408 (6.99) 9,324.5380 (45.08) 114.7825 (118.82) 29;4 107.1768 (0.02) 105 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +------------------------------------------------------------------------------------------------------------ benchmark: 24 tests ------------------------------------------------------------------------------------------------------------ +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_lz4[Mark.Twain-Tom.Sawyer.txt-cramjam] 205.9030 (1.0) 487.6750 (1.16) 235.1050 (1.0) 42.5729 (1.46) 218.6220 (1.0) 21.7648 (1.13) 482;537 4,253.4186 (1.0) 3949 1 +test_lz4[Mark.Twain-Tom.Sawyer.txt-python-lz4] 211.8470 (1.03) 421.1400 (1.0) 258.1943 (1.10) 61.8064 (2.11) 223.0840 (1.02) 60.6820 (3.16) 453;175 3,873.0528 (0.91) 1935 1 +test_lz4[alice29.txt-cramjam] 1,829.0490 (8.88) 2,390.3910 (5.68) 1,939.9762 (8.25) 101.4219 (3.47) 1,913.4190 (8.75) 93.0095 (4.84) 103;34 515.4702 (0.12) 501 1 +test_lz4[alice29.txt-python-lz4] 3,035.4070 (14.74) 3,972.2100 (9.43) 3,263.6920 (13.88) 156.1966 (5.34) 3,215.2680 (14.71) 166.8995 (8.68) 73;16 306.4015 (0.07) 309 1 +test_lz4[asyoulik.txt-cramjam] 1,440.7250 (7.00) 2,176.8530 (5.17) 1,530.6210 (6.51) 86.4176 (2.95) 1,508.6560 (6.90) 75.1295 (3.91) 133;45 653.3296 (0.15) 604 1 +test_lz4[asyoulik.txt-python-lz4] 2,627.0850 (12.76) 4,329.6350 (10.28) 2,802.7557 (11.92) 209.5091 (7.16) 2,757.2175 (12.61) 113.6400 (5.91) 24;26 356.7917 (0.08) 322 1 +test_lz4[fireworks.jpeg-cramjam] 298.4680 (1.45) 534.6860 (1.27) 321.2755 (1.37) 29.2533 (1.0) 307.8030 (1.41) 19.2315 (1.0) 214;195 3,112.5937 (0.73) 1987 1 +test_lz4[fireworks.jpeg-python-lz4] 2,646.1140 (12.85) 3,782.5490 (8.98) 2,811.4612 (11.96) 141.2595 (4.83) 2,785.4975 (12.74) 133.7265 (6.95) 25;9 355.6869 (0.08) 204 1 +test_lz4[geo.protodata-cramjam] 488.7870 (2.37) 840.0170 (1.99) 522.8667 (2.22) 49.3122 (1.69) 508.8640 (2.33) 35.3760 (1.84) 169;150 1,912.5334 (0.45) 1686 1 +test_lz4[geo.protodata-python-lz4] 671.5480 (3.26) 1,211.8090 (2.88) 719.1932 (3.06) 47.4541 (1.62) 706.5320 (3.23) 35.7863 (1.86) 115;89 1,390.4469 (0.33) 1137 1 +test_lz4[html-cramjam] 508.0540 (2.47) 870.8210 (2.07) 556.4302 (2.37) 55.0444 (1.88) 539.5200 (2.47) 40.0038 (2.08) 162;130 1,797.1706 (0.42) 1387 1 +test_lz4[html-python-lz4] 696.9610 (3.38) 1,266.9380 (3.01) 769.8121 (3.27) 89.4869 (3.06) 740.7540 (3.39) 54.1665 (2.82) 94;98 1,299.0183 (0.31) 1085 1 +test_lz4[html_x_4-cramjam] 2,149.2940 (10.44) 4,065.1930 (9.65) 2,412.1607 (10.26) 305.1893 (10.43) 2,325.4890 (10.64) 184.0815 (9.57) 38;39 414.5661 (0.10) 348 1 +test_lz4[html_x_4-python-lz4] 2,932.0430 (14.24) 3,806.8100 (9.04) 3,143.4955 (13.37) 165.2213 (5.65) 3,102.5030 (14.19) 111.0107 (5.77) 40;22 318.1172 (0.07) 231 1 +test_lz4[kppkn.gtb-cramjam] 1,679.8610 (8.16) 2,542.5370 (6.04) 1,789.8313 (7.61) 106.6801 (3.65) 1,768.8005 (8.09) 87.1930 (4.53) 59;27 558.7119 (0.13) 462 1 +test_lz4[kppkn.gtb-python-lz4] 3,209.9940 (15.59) 4,404.1140 (10.46) 3,393.7427 (14.44) 157.5778 (5.39) 3,351.8800 (15.33) 150.1042 (7.81) 31;12 294.6599 (0.07) 275 1 +test_lz4[lcet10.txt-cramjam] 4,852.5360 (23.57) 7,953.2970 (18.89) 5,275.6863 (22.44) 474.4112 (16.22) 5,182.7910 (23.71) 278.4967 (14.48) 9;9 189.5488 (0.04) 199 1 +test_lz4[lcet10.txt-python-lz4] 8,251.3110 (40.07) 12,421.4480 (29.49) 8,792.2057 (37.40) 554.4787 (18.95) 8,662.1010 (39.62) 373.2330 (19.41) 7;7 113.7371 (0.03) 114 1 +test_lz4[paper-100k.pdf-cramjam] 405.3170 (1.97) 673.3370 (1.60) 429.1101 (1.83) 34.4027 (1.18) 419.5350 (1.92) 25.4660 (1.32) 165;148 2,330.4041 (0.55) 2110 1 +test_lz4[paper-100k.pdf-python-lz4] 1,752.0420 (8.51) 2,357.7380 (5.60) 1,847.5694 (7.86) 86.1250 (2.94) 1,821.4480 (8.33) 86.6703 (4.51) 125;34 541.2517 (0.13) 517 1 +test_lz4[plrabn12.txt-cramjam] 5,994.5400 (29.11) 10,160.5840 (24.13) 6,528.1635 (27.77) 479.6425 (16.40) 6,415.1230 (29.34) 244.5412 (12.72) 11;11 153.1824 (0.04) 127 1 +test_lz4[plrabn12.txt-python-lz4] 11,911.5420 (57.85) 19,488.4880 (46.28) 13,580.5948 (57.76) 1,975.2401 (67.52) 12,662.1220 (57.92) 1,491.3778 (77.55) 13;13 73.6345 (0.02) 81 1 +test_lz4[urls.10K-cramjam] 6,125.3900 (29.75) 10,022.6590 (23.80) 6,906.8542 (29.38) 800.5537 (27.37) 6,660.1280 (30.46) 327.7380 (17.04) 15;16 144.7837 (0.03) 153 1 +test_lz4[urls.10K-python-lz4] 9,350.5950 (45.41) 13,417.3250 (31.86) 10,231.2214 (43.52) 722.7599 (24.71) 10,112.5745 (46.26) 622.8635 (32.39) 11;8 97.7400 (0.02) 96 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` #### Brotli @@ -130,30 +178,30 @@ test_lz4[urls.10K-python-lz4] 9,186.5620 (45.15) 9,727 ----------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------ Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 17.9007 (1.0) 18.5634 (1.0) 18.0894 (1.0) 0.1631 (1.0) 18.0193 (1.0) 0.1939 (1.0) 10;1 55.2809 (1.0) 43 1 -test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 19.0021 (1.06) 19.8885 (1.07) 19.2310 (1.06) 0.1936 (1.19) 19.1951 (1.07) 0.2085 (1.07) 15;2 51.9994 (0.94) 51 1 -test_brotli[alice29.txt-brotli] 220.5389 (12.32) 221.1390 (11.91) 220.7948 (12.21) 0.2183 (1.34) 220.7497 (12.25) 0.2151 (1.11) 2;0 4.5291 (0.08) 5 1 -test_brotli[alice29.txt-cramjam] 232.1295 (12.97) 233.8130 (12.60) 232.9461 (12.88) 0.7112 (4.36) 233.1537 (12.94) 1.1918 (6.15) 2;0 4.2928 (0.08) 5 1 -test_brotli[asyoulik.txt-brotli] 174.3166 (9.74) 175.6382 (9.46) 175.1187 (9.68) 0.5167 (3.17) 175.3066 (9.73) 0.8275 (4.27) 2;0 5.7104 (0.10) 6 1 -test_brotli[asyoulik.txt-cramjam] 184.5938 (10.31) 187.0546 (10.08) 185.4668 (10.25) 0.9035 (5.54) 185.3526 (10.29) 1.1405 (5.88) 1;0 5.3918 (0.10) 6 1 -test_brotli[fireworks.jpeg-brotli] 70.5355 (3.94) 82.0225 (4.42) 71.7829 (3.97) 2.9634 (18.17) 70.9972 (3.94) 0.5353 (2.76) 1;1 13.9309 (0.25) 14 1 -test_brotli[fireworks.jpeg-cramjam] 76.9092 (4.30) 81.3156 (4.38) 77.7946 (4.30) 1.2098 (7.42) 77.4907 (4.30) 0.8702 (4.49) 1;1 12.8544 (0.23) 13 1 -test_brotli[geo.protodata-brotli] 123.9892 (6.93) 126.4977 (6.81) 124.9959 (6.91) 1.0019 (6.14) 124.7437 (6.92) 1.6731 (8.63) 3;0 8.0003 (0.14) 8 1 -test_brotli[geo.protodata-cramjam] 127.8089 (7.14) 129.7215 (6.99) 128.3528 (7.10) 0.6489 (3.98) 128.1646 (7.11) 0.6682 (3.45) 1;1 7.7910 (0.14) 8 1 -test_brotli[html-brotli] 132.3877 (7.40) 139.4814 (7.51) 134.3966 (7.43) 2.8777 (17.64) 132.9534 (7.38) 3.0888 (15.93) 2;0 7.4407 (0.13) 8 1 -test_brotli[html-cramjam] 134.8224 (7.53) 140.8846 (7.59) 136.1176 (7.52) 1.9671 (12.06) 135.6059 (7.53) 0.7402 (3.82) 1;1 7.3466 (0.13) 8 1 -test_brotli[html_x_4-brotli] 161.5212 (9.02) 163.8326 (8.83) 162.3997 (8.98) 0.8996 (5.51) 161.8780 (8.98) 1.4229 (7.34) 1;0 6.1576 (0.11) 7 1 -test_brotli[html_x_4-cramjam] 169.5740 (9.47) 171.0375 (9.21) 170.2433 (9.41) 0.5199 (3.19) 170.1756 (9.44) 0.7178 (3.70) 2;0 5.8739 (0.11) 6 1 -test_brotli[kppkn.gtb-brotli] 417.5436 (23.33) 419.3681 (22.59) 418.4432 (23.13) 0.8296 (5.09) 418.6743 (23.23) 1.5177 (7.83) 3;0 2.3898 (0.04) 5 1 -test_brotli[kppkn.gtb-cramjam] 440.9632 (24.63) 443.1299 (23.87) 442.1863 (24.44) 1.1046 (6.77) 442.8029 (24.57) 2.0629 (10.64) 2;0 2.2615 (0.04) 5 1 -test_brotli[lcet10.txt-brotli] 689.7913 (38.53) 743.3864 (40.05) 717.3455 (39.66) 24.4643 (149.97) 729.2844 (40.47) 42.6345 (219.83) 3;0 1.3940 (0.03) 5 1 -test_brotli[lcet10.txt-cramjam] 721.1161 (40.28) 732.6687 (39.47) 724.4550 (40.05) 4.6831 (28.71) 723.2143 (40.14) 3.9332 (20.28) 1;1 1.3803 (0.02) 5 1 -test_brotli[paper-100k.pdf-brotli] 394.1195 (22.02) 396.6882 (21.37) 395.3076 (21.85) 1.1796 (7.23) 394.7834 (21.91) 2.1113 (10.89) 2;0 2.5297 (0.05) 5 1 -test_brotli[paper-100k.pdf-cramjam] 400.0918 (22.35) 403.0363 (21.71) 401.5485 (22.20) 1.2038 (7.38) 401.7684 (22.30) 1.9826 (10.22) 2;0 2.4904 (0.05) 5 1 -test_brotli[plrabn12.txt-brotli] 753.8846 (42.11) 758.9727 (40.89) 756.4733 (41.82) 2.1144 (12.96) 756.1968 (41.97) 3.5936 (18.53) 2;0 1.3219 (0.02) 5 1 -test_brotli[plrabn12.txt-cramjam] 794.5884 (44.39) 802.1071 (43.21) 796.4616 (44.03) 3.1963 (19.59) 795.1971 (44.13) 2.7926 (14.40) 1;1 1.2556 (0.02) 5 1 -test_brotli[urls.10K-brotli] 1,219.2465 (68.11) 1,227.4113 (66.12) 1,224.0802 (67.67) 3.1470 (19.29) 1,224.5238 (67.96) 4.2651 (21.99) 2;0 0.8169 (0.01) 5 1 -test_brotli[urls.10K-cramjam] 1,272.6277 (71.09) 1,285.8821 (69.27) 1,279.8358 (70.75) 5.4317 (33.30) 1,278.5339 (70.95) 8.5926 (44.30) 2;0 0.7814 (0.01) 5 1 +test_brotli[Mark.Twain-Tom.Sawyer.txt-brotli] 18.2539 (1.0) 22.8656 (1.14) 18.9863 (1.0) 0.7931 (1.94) 18.8713 (1.0) 0.3620 (1.0) 3;3 52.6695 (1.0) 51 1 +test_brotli[Mark.Twain-Tom.Sawyer.txt-cramjam] 18.4573 (1.01) 19.9932 (1.0) 19.0165 (1.00) 0.4094 (1.0) 18.9207 (1.00) 0.5462 (1.51) 18;0 52.5860 (1.00) 51 1 +test_brotli[alice29.txt-brotli] 225.5098 (12.35) 233.2325 (11.67) 227.9498 (12.01) 3.2069 (7.83) 226.3776 (12.00) 4.0514 (11.19) 1;0 4.3869 (0.08) 5 1 +test_brotli[alice29.txt-cramjam] 233.5171 (12.79) 235.4350 (11.78) 234.6430 (12.36) 0.7237 (1.77) 234.8783 (12.45) 0.8757 (2.42) 2;0 4.2618 (0.08) 5 1 +test_brotli[asyoulik.txt-brotli] 187.4845 (10.27) 197.4504 (9.88) 193.1492 (10.17) 3.7175 (9.08) 194.0205 (10.28) 4.5618 (12.60) 2;0 5.1773 (0.10) 5 1 +test_brotli[asyoulik.txt-cramjam] 185.8101 (10.18) 192.5796 (9.63) 189.0552 (9.96) 2.4759 (6.05) 189.2976 (10.03) 3.5369 (9.77) 2;0 5.2895 (0.10) 6 1 +test_brotli[fireworks.jpeg-brotli] 74.7665 (4.10) 107.0830 (5.36) 82.2891 (4.33) 8.4381 (20.61) 80.2819 (4.25) 8.3648 (23.11) 1;1 12.1523 (0.23) 13 1 +test_brotli[fireworks.jpeg-cramjam] 74.2872 (4.07) 80.4223 (4.02) 76.5993 (4.03) 2.0518 (5.01) 75.4147 (4.00) 3.2772 (9.05) 4;0 13.0549 (0.25) 13 1 +test_brotli[geo.protodata-brotli] 128.6399 (7.05) 130.4648 (6.53) 129.3095 (6.81) 0.6225 (1.52) 129.3085 (6.85) 0.8858 (2.45) 2;0 7.7334 (0.15) 8 1 +test_brotli[geo.protodata-cramjam] 132.1978 (7.24) 135.9461 (6.80) 133.4739 (7.03) 1.3324 (3.25) 132.8753 (7.04) 1.8923 (5.23) 2;0 7.4921 (0.14) 8 1 +test_brotli[html-brotli] 134.8012 (7.38) 145.0839 (7.26) 136.9357 (7.21) 3.3821 (8.26) 135.6421 (7.19) 1.6070 (4.44) 1;1 7.3027 (0.14) 8 1 +test_brotli[html-cramjam] 138.4083 (7.58) 142.2173 (7.11) 139.8572 (7.37) 1.4800 (3.62) 139.2130 (7.38) 2.5172 (6.95) 2;0 7.1501 (0.14) 8 1 +test_brotli[html_x_4-brotli] 171.6389 (9.40) 180.3112 (9.02) 176.3479 (9.29) 3.4427 (8.41) 176.0761 (9.33) 6.0443 (16.70) 3;0 5.6706 (0.11) 6 1 +test_brotli[html_x_4-cramjam] 180.0620 (9.86) 196.8542 (9.85) 188.7382 (9.94) 7.2089 (17.61) 189.4560 (10.04) 15.2504 (42.13) 3;0 5.2983 (0.10) 6 1 +test_brotli[kppkn.gtb-brotli] 447.6251 (24.52) 475.9302 (23.80) 463.3429 (24.40) 10.8179 (26.43) 464.2027 (24.60) 15.2439 (42.12) 2;0 2.1582 (0.04) 5 1 +test_brotli[kppkn.gtb-cramjam] 446.2874 (24.45) 456.8182 (22.85) 450.9967 (23.75) 4.4023 (10.75) 452.4705 (23.98) 6.8669 (18.97) 2;0 2.2173 (0.04) 5 1 +test_brotli[lcet10.txt-brotli] 714.6673 (39.15) 734.2637 (36.73) 723.5614 (38.11) 8.6142 (21.04) 720.8160 (38.20) 15.2477 (42.13) 2;0 1.3821 (0.03) 5 1 +test_brotli[lcet10.txt-cramjam] 736.3283 (40.34) 811.1870 (40.57) 764.1775 (40.25) 30.8189 (75.28) 754.4741 (39.98) 46.2688 (127.83) 1;0 1.3086 (0.02) 5 1 +test_brotli[paper-100k.pdf-brotli] 406.3972 (22.26) 408.4076 (20.43) 407.4876 (21.46) 0.8200 (2.00) 407.7548 (21.61) 1.3093 (3.62) 2;0 2.4541 (0.05) 5 1 +test_brotli[paper-100k.pdf-cramjam] 409.8010 (22.45) 434.1073 (21.71) 421.4300 (22.20) 9.7730 (23.87) 417.9097 (22.15) 14.9918 (41.42) 2;0 2.3729 (0.05) 5 1 +test_brotli[plrabn12.txt-brotli] 794.1873 (43.51) 819.0397 (40.97) 806.7543 (42.49) 10.6155 (25.93) 803.9709 (42.60) 18.2808 (50.51) 2;0 1.2395 (0.02) 5 1 +test_brotli[plrabn12.txt-cramjam] 802.7903 (43.98) 818.8850 (40.96) 811.7899 (42.76) 7.6594 (18.71) 814.7583 (43.17) 14.3045 (39.52) 1;0 1.2318 (0.02) 5 1 +test_brotli[urls.10K-brotli] 1,282.8863 (70.28) 1,329.7179 (66.51) 1,303.8467 (68.67) 19.9188 (48.66) 1,294.4292 (68.59) 32.3713 (89.43) 2;0 0.7670 (0.01) 5 1 +test_brotli[urls.10K-cramjam] 1,300.4646 (71.24) 1,344.7246 (67.26) 1,317.4961 (69.39) 20.5248 (50.14) 1,305.3920 (69.17) 34.9071 (96.44) 1;0 0.7590 (0.01) 5 1 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` @@ -162,34 +210,34 @@ test_brotli[urls.10K-cramjam] 1,272.6277 (71.09) 1,285.8 `make bench-zstd` ```bash -------------------------------------------------------------------------------------------------------- benchmark: 24 tests -------------------------------------------------------------------------------------------------------- -Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -test_zstd[Mark.Twain-Tom.Sawyer.txt-cramjam] 161.1050 (1.0) 433.4360 (1.45) 165.1466 (1.0) 12.3976 (1.0) 162.1550 (1.0) 0.6950 (1.0) 100;276 6,055.2258 (1.0) 1585 1 -test_zstd[Mark.Twain-Tom.Sawyer.txt-zstd] 220.8000 (1.37) 299.4960 (1.0) 249.8003 (1.51) 17.7341 (1.43) 256.9255 (1.58) 30.2230 (43.49) 17;0 4,003.1981 (0.66) 50 1 -test_zstd[alice29.txt-cramjam] 1,421.3080 (8.82) 2,037.1450 (6.80) 1,472.4503 (8.92) 66.0363 (5.33) 1,451.2480 (8.95) 61.9850 (89.19) 74;21 679.1401 (0.11) 615 1 -test_zstd[alice29.txt-zstd] 1,317.3690 (8.18) 2,371.9740 (7.92) 1,365.1289 (8.27) 95.6726 (7.72) 1,343.0765 (8.28) 44.4860 (64.01) 35;57 732.5316 (0.12) 684 1 -test_zstd[asyoulik.txt-cramjam] 1,256.8620 (7.80) 1,715.7540 (5.73) 1,294.9080 (7.84) 49.4700 (3.99) 1,280.9885 (7.90) 41.7850 (60.12) 82;51 772.2556 (0.13) 698 1 -test_zstd[asyoulik.txt-zstd] 1,156.5710 (7.18) 1,736.3310 (5.80) 1,189.7247 (7.20) 45.2945 (3.65) 1,178.9070 (7.27) 42.4503 (61.08) 103;47 840.5306 (0.14) 837 1 -test_zstd[fireworks.jpeg-cramjam] 283.7220 (1.76) 502.0290 (1.68) 300.4003 (1.82) 24.4473 (1.97) 293.7890 (1.81) 19.9285 (28.67) 301;184 3,328.8915 (0.55) 2620 1 -test_zstd[fireworks.jpeg-zstd] 216.5220 (1.34) 586.8800 (1.96) 235.8188 (1.43) 28.7429 (2.32) 223.4890 (1.38) 19.2512 (27.70) 310;265 4,240.5432 (0.70) 3205 1 -test_zstd[geo.protodata-cramjam] 330.2580 (2.05) 523.4440 (1.75) 341.2278 (2.07) 20.4295 (1.65) 332.8170 (2.05) 10.0517 (14.46) 196;208 2,930.5937 (0.48) 2083 1 -test_zstd[geo.protodata-zstd] 280.5380 (1.74) 512.8140 (1.71) 292.1102 (1.77) 18.6813 (1.51) 285.3240 (1.76) 7.8345 (11.27) 257;405 3,423.3652 (0.57) 3007 1 -test_zstd[html-cramjam] 357.8320 (2.22) 648.3850 (2.16) 369.4639 (2.24) 24.1993 (1.95) 359.8050 (2.22) 10.8335 (15.59) 155;207 2,706.6244 (0.45) 2036 1 -test_zstd[html-zstd] 311.9870 (1.94) 561.2720 (1.87) 319.6523 (1.94) 15.3747 (1.24) 314.0850 (1.94) 3.5158 (5.06) 269;608 3,128.3989 (0.52) 2879 1 -test_zstd[html_x_4-cramjam] 465.3780 (2.89) 16,841.4000 (56.23) 555.1090 (3.36) 450.9177 (36.37) 505.6665 (3.12) 60.2160 (86.64) 16;185 1,801.4481 (0.30) 1388 1 -test_zstd[html_x_4-zstd] 365.6660 (2.27) 625.7230 (2.09) 384.4546 (2.33) 28.6141 (2.31) 369.0950 (2.28) 23.1068 (33.25) 200;138 2,601.0872 (0.43) 1765 1 -test_zstd[kppkn.gtb-cramjam] 1,124.4530 (6.98) 2,338.1150 (7.81) 1,277.8409 (7.74) 165.0912 (13.32) 1,223.1610 (7.54) 105.8960 (152.37) 71;69 782.5700 (0.13) 659 1 -test_zstd[kppkn.gtb-zstd] 1,045.2590 (6.49) 2,456.0800 (8.20) 1,159.7191 (7.02) 211.4021 (17.05) 1,098.6080 (6.78) 76.0943 (109.49) 66;87 862.2778 (0.14) 893 1 -test_zstd[lcet10.txt-cramjam] 3,681.4250 (22.85) 7,913.6370 (26.42) 4,061.2618 (24.59) 464.6470 (37.48) 3,955.3625 (24.39) 318.0175 (457.58) 13;10 246.2289 (0.04) 244 1 -test_zstd[lcet10.txt-zstd] 3,409.9820 (21.17) 6,680.4900 (22.31) 3,815.8187 (23.11) 479.5487 (38.68) 3,653.2820 (22.53) 351.3137 (505.49) 23;20 262.0670 (0.04) 255 1 -test_zstd[paper-100k.pdf-cramjam] 493.9780 (3.07) 1,139.2420 (3.80) 508.1456 (3.08) 31.2199 (2.52) 496.2745 (3.06) 19.9030 (28.64) 71;58 1,967.9400 (0.32) 1026 1 -test_zstd[paper-100k.pdf-zstd] 479.7970 (2.98) 716.8680 (2.39) 495.2747 (3.00) 27.0047 (2.18) 483.4690 (2.98) 19.1000 (27.48) 171;115 2,019.0817 (0.33) 1639 1 -test_zstd[plrabn12.txt-cramjam] 4,967.6940 (30.84) 5,992.0030 (20.01) 5,086.0596 (30.80) 130.3956 (10.52) 5,055.3775 (31.18) 132.6160 (190.81) 12;6 196.6159 (0.03) 166 1 -test_zstd[plrabn12.txt-zstd] 4,617.7570 (28.66) 5,760.2680 (19.23) 4,726.4604 (28.62) 116.1931 (9.37) 4,694.4440 (28.95) 120.5897 (173.51) 20;9 211.5748 (0.03) 211 1 -test_zstd[urls.10K-cramjam] 4,553.1040 (28.26) 5,873.6000 (19.61) 4,724.6278 (28.61) 150.2981 (12.12) 4,694.5230 (28.95) 180.9015 (260.29) 39;3 211.6569 (0.03) 197 1 -test_zstd[urls.10K-zstd] 4,198.9830 (26.06) 6,017.4010 (20.09) 4,444.2599 (26.91) 258.4360 (20.85) 4,379.3710 (27.01) 191.9520 (276.19) 18;17 225.0093 (0.04) 231 1 ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +------------------------------------------------------------------------------------------------------- benchmark: 24 tests ------------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_zstd[Mark.Twain-Tom.Sawyer.txt-cramjam] 154.4260 (1.0) 429.8220 (1.61) 165.7084 (1.0) 18.0132 (1.0) 159.4600 (1.0) 11.8590 (1.0) 211;207 6,034.6953 (1.0) 1685 1 +test_zstd[Mark.Twain-Tom.Sawyer.txt-zstd] 191.9860 (1.24) 267.6270 (1.0) 219.1654 (1.32) 28.9777 (1.61) 197.9910 (1.24) 56.6957 (4.78) 22;0 4,562.7633 (0.76) 53 1 +test_zstd[alice29.txt-cramjam] 1,362.2300 (8.82) 2,102.8580 (7.86) 1,439.4675 (8.69) 73.2922 (4.07) 1,422.7575 (8.92) 70.1800 (5.92) 134;36 694.7014 (0.12) 684 1 +test_zstd[alice29.txt-zstd] 1,315.1700 (8.52) 2,365.9580 (8.84) 1,403.9396 (8.47) 120.7466 (6.70) 1,372.8835 (8.61) 70.7280 (5.96) 72;76 712.2813 (0.12) 744 1 +test_zstd[asyoulik.txt-cramjam] 1,211.2850 (7.84) 1,841.2200 (6.88) 1,284.1431 (7.75) 77.5367 (4.30) 1,270.1210 (7.97) 59.2710 (5.00) 78;53 778.7294 (0.13) 757 1 +test_zstd[asyoulik.txt-zstd] 1,157.6700 (7.50) 1,653.1200 (6.18) 1,220.0159 (7.36) 65.1603 (3.62) 1,204.0080 (7.55) 55.9547 (4.72) 92;49 819.6615 (0.14) 805 1 +test_zstd[fireworks.jpeg-cramjam] 292.3470 (1.89) 602.9980 (2.25) 313.0014 (1.89) 33.5971 (1.87) 302.4510 (1.90) 20.7312 (1.75) 287;295 3,194.8734 (0.53) 3057 1 +test_zstd[fireworks.jpeg-zstd] 216.5420 (1.40) 452.4620 (1.69) 229.1745 (1.38) 22.7349 (1.26) 222.9190 (1.40) 12.9210 (1.09) 378;420 4,363.4868 (0.72) 3623 1 +test_zstd[geo.protodata-cramjam] 324.0560 (2.10) 672.1640 (2.51) 354.0909 (2.14) 42.7156 (2.37) 340.2680 (2.13) 27.8100 (2.35) 216;209 2,824.1338 (0.47) 2621 1 +test_zstd[geo.protodata-zstd] 281.7450 (1.82) 535.1640 (2.00) 300.6179 (1.81) 29.3579 (1.63) 291.3200 (1.83) 18.6490 (1.57) 247;242 3,326.4816 (0.55) 2894 1 +test_zstd[html-cramjam] 349.8810 (2.27) 769.5230 (2.88) 376.6673 (2.27) 43.1311 (2.39) 362.2320 (2.27) 27.6060 (2.33) 188;186 2,654.8631 (0.44) 2332 1 +test_zstd[html-zstd] 312.4120 (2.02) 580.8570 (2.17) 333.4397 (2.01) 32.0637 (1.78) 323.5910 (2.03) 21.6778 (1.83) 253;222 2,999.0433 (0.50) 2851 1 +test_zstd[html_x_4-cramjam] 476.8870 (3.09) 887.5280 (3.32) 537.3586 (3.24) 76.9432 (4.27) 509.1320 (3.19) 46.7175 (3.94) 115;122 1,860.9548 (0.31) 909 1 +test_zstd[html_x_4-zstd] 364.0010 (2.36) 797.9640 (2.98) 389.8506 (2.35) 37.9180 (2.11) 377.5145 (2.37) 28.5240 (2.41) 215;177 2,565.0855 (0.43) 2332 1 +test_zstd[kppkn.gtb-cramjam] 1,082.0570 (7.01) 1,696.9090 (6.34) 1,148.6073 (6.93) 72.4650 (4.02) 1,131.7090 (7.10) 57.8217 (4.88) 85;62 870.6196 (0.14) 749 1 +test_zstd[kppkn.gtb-zstd] 1,043.9460 (6.76) 1,771.5120 (6.62) 1,110.4534 (6.70) 81.5983 (4.53) 1,090.0220 (6.84) 54.9060 (4.63) 95;82 900.5331 (0.15) 879 1 +test_zstd[lcet10.txt-cramjam] 3,509.2670 (22.72) 5,409.0240 (20.21) 3,734.3465 (22.54) 230.2715 (12.78) 3,684.9145 (23.11) 163.8595 (13.82) 19;14 267.7845 (0.04) 268 1 +test_zstd[lcet10.txt-zstd] 3,387.3740 (21.94) 5,608.0490 (20.95) 3,595.1312 (21.70) 246.2549 (13.67) 3,542.5960 (22.22) 180.9382 (15.26) 19;12 278.1540 (0.05) 283 1 +test_zstd[paper-100k.pdf-cramjam] 488.0500 (3.16) 942.0480 (3.52) 520.3597 (3.14) 44.4527 (2.47) 508.0725 (3.19) 36.1535 (3.05) 166;116 1,921.7476 (0.32) 1900 1 +test_zstd[paper-100k.pdf-zstd] 479.8510 (3.11) 868.9010 (3.25) 520.1514 (3.14) 46.2761 (2.57) 509.5520 (3.20) 28.6118 (2.41) 197;190 1,922.5171 (0.32) 1977 1 +test_zstd[plrabn12.txt-cramjam] 4,742.2590 (30.71) 6,472.1400 (24.18) 5,019.8933 (30.29) 234.2066 (13.00) 4,958.0030 (31.09) 198.3953 (16.73) 26;12 199.2074 (0.03) 167 1 +test_zstd[plrabn12.txt-zstd] 4,586.0060 (29.70) 6,362.5020 (23.77) 4,846.4730 (29.25) 238.6297 (13.25) 4,803.3360 (30.12) 206.9840 (17.45) 26;9 206.3356 (0.03) 203 1 +test_zstd[urls.10K-cramjam] 4,455.0140 (28.85) 8,134.9550 (30.40) 4,870.4095 (29.39) 491.9910 (27.31) 4,727.1960 (29.65) 212.0393 (17.88) 23;28 205.3215 (0.03) 201 1 +test_zstd[urls.10K-zstd] 4,177.0250 (27.05) 5,455.3670 (20.38) 4,437.3242 (26.78) 210.9385 (11.71) 4,382.1610 (27.48) 231.2377 (19.50) 44;11 225.3610 (0.04) 227 1 +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` diff --git a/benchmarks/test_bench.py b/benchmarks/test_bench.py index 5683c649..f3475313 100644 --- a/benchmarks/test_bench.py +++ b/benchmarks/test_bench.py @@ -19,18 +19,19 @@ def round_trip(compress, decompress, data, **kwargs): "use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy" ) @pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) -def test_snappy_raw(benchmark, file, use_cramjam: bool): +def test_snappy(benchmark, file, use_cramjam: bool): """ Uses the non-framed format for snappy compression """ import snappy - data = file.read_bytes() + data = bytearray(file.read_bytes()) # bytearray avoids double allocation in cramjam snappy by default + # Can be slightly faster if passing output_len to compress/decompress ops if use_cramjam: benchmark( round_trip, - compress=cramjam.snappy_compress_raw, - decompress=cramjam.snappy_decompress_raw, + compress=cramjam.snappy.compress, + decompress=cramjam.snappy.decompress, data=data, ) else: @@ -45,17 +46,27 @@ def test_snappy_raw(benchmark, file, use_cramjam: bool): @pytest.mark.parametrize( "use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "gzip" ) +@pytest.mark.parametrize("set_output_len", (True, False), ids=lambda val: f"used-output_len={val}") @pytest.mark.parametrize("file", FILES, ids=lambda val: val.name) -def test_gzip(benchmark, file, use_cramjam: bool): +def test_gzip(benchmark, file, use_cramjam: bool, set_output_len: bool): data = file.read_bytes() if use_cramjam: - benchmark( - round_trip, - compress=cramjam.gzip_compress, - decompress=cramjam.gzip_decompress, - data=data, - level=9, - ) + if set_output_len: + compressed_len = len(cramjam.gzip.compress(data)) + benchmark( + round_trip, + compress=lambda bytes: cramjam.gzip.compress(bytes, level=9, output_len=compressed_len), + decompress=lambda bytes: cramjam.gzip.decompress(bytes, output_len=len(data)), + data=data, + ) + else: + benchmark( + round_trip, + compress=cramjam.gzip.compress, + decompress=cramjam.gzip.decompress, + data=data, + level=9, + ) else: benchmark( round_trip, @@ -77,8 +88,8 @@ def test_lz4(benchmark, file, use_cramjam: bool): if use_cramjam: benchmark( round_trip, - compress=cramjam.lz4_compress, - decompress=cramjam.lz4_decompress, + compress=cramjam.lz4.compress, + decompress=cramjam.lz4.decompress, data=data, level=4, ) @@ -103,8 +114,8 @@ def test_brotli(benchmark, file, use_cramjam: bool): if use_cramjam: benchmark( round_trip, - compress=cramjam.brotli_compress, - decompress=cramjam.brotli_decompress, + compress=cramjam.brotli.compress, + decompress=cramjam.brotli.decompress, data=data, ) else: @@ -127,9 +138,9 @@ def test_zstd(benchmark, file, use_cramjam: bool): if use_cramjam: benchmark( round_trip, - compress=cramjam.zstd_compress, - decompress=cramjam.zstd_decompress, - data=data, + compress=cramjam.zstd.compress, + decompress=cramjam.zstd.decompress, + data=data ) else: benchmark( diff --git a/src/brotli.rs b/src/brotli.rs index 58bbdeac..7ec47d28 100644 --- a/src/brotli.rs +++ b/src/brotli.rs @@ -1,19 +1,138 @@ -use brotli2::read::{BrotliDecoder, BrotliEncoder}; -use std::error::Error; -use std::io::prelude::*; +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType, Output}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; -/// Decompress via Brotli -pub fn decompress(data: &[u8]) -> Result, Box> { - let mut decoder = BrotliDecoder::new(data); - let mut buf = vec![]; - decoder.read_to_end(&mut buf)?; - Ok(buf) +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + Ok(()) } -/// Compress via Brotli -pub fn compress(data: &[u8], level: u32) -> Result, Box> { - let mut encoder = BrotliEncoder::new(data, level); - let mut buf = vec![]; - encoder.read_to_end(&mut buf)?; - Ok(buf) +/// Brotli decompression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.brotli.decompress(compressed_bytes, output_len=Optional[int]) +/// ``` +#[pyfunction] +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +/// Brotli compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.brotli.compress(b'some bytes here', level=9, output_len=Option[int]) # level defaults to 11 +/// ``` +#[pyfunction] +pub fn compress<'a>( + py: Python<'a>, + data: BytesType<'a>, + level: Option, + output_len: Option, +) -> PyResult> { + let level = level.unwrap_or_else(|| 11); + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +mod internal { + + use crate::Output; + use brotli2::read::{BrotliDecoder, BrotliEncoder}; + use std::io::prelude::*; + use std::io::Error; + + /// Decompress via Brotli + pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result { + let mut decoder = BrotliDecoder::new(data); + match output { + Output::Slice(slice) => decoder.read(slice), + Output::Vector(v) => decoder.read_to_end(v), + } + } + + /// Compress via Brotli + pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: u32) -> Result { + let mut encoder = BrotliEncoder::new(data, level); + match output { + Output::Slice(slice) => encoder.read(slice), + Output::Vector(v) => encoder.read_to_end(v), + } + } } diff --git a/src/deflate.rs b/src/deflate.rs index 12421b15..3b3761d7 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -1,20 +1,139 @@ -use flate2::read::{DeflateDecoder, DeflateEncoder}; -use flate2::Compression; -use std::error::Error; -use std::io::prelude::*; +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType, Output}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; -/// Decompress gzip data -pub fn decompress(data: &[u8]) -> Result, Box> { - let mut decoder = DeflateDecoder::new(data); - let mut buf = vec![]; - decoder.read_to_end(&mut buf)?; - Ok(buf) +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + Ok(()) } -/// Compress gzip data -pub fn compress(data: &[u8], level: u32) -> Result, Box> { - let mut buf = vec![]; - let mut encoder = DeflateEncoder::new(data, Compression::new(level)); - encoder.read_to_end(&mut buf)?; - Ok(buf) +/// Deflate decompression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.deflate.decompress(compressed_bytes, output_len=Optional[int]) +/// ``` +#[pyfunction] +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +/// Deflate compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.deflate.compress(b'some bytes here', level=5, output_len=Optional[int]) # level defaults to 6 +/// ``` +#[pyfunction] +pub fn compress<'a>( + py: Python<'a>, + data: BytesType<'a>, + level: Option, + output_len: Option, +) -> PyResult> { + let level = level.unwrap_or_else(|| 6); + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +mod internal { + + use crate::Output; + use flate2::read::{DeflateDecoder, DeflateEncoder}; + use flate2::Compression; + use std::io::prelude::*; + use std::io::Error; + + /// Decompress gzip data + pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result { + let mut decoder = DeflateDecoder::new(data); + match output { + Output::Slice(slice) => decoder.read(slice), + Output::Vector(v) => decoder.read_to_end(v), + } + } + + /// Compress gzip data + pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: u32) -> Result { + let mut encoder = DeflateEncoder::new(data, Compression::new(level)); + match output { + Output::Slice(slice) => encoder.read(slice), + Output::Vector(v) => encoder.read_to_end(v), + } + } } diff --git a/src/gzip.rs b/src/gzip.rs index 044aec43..5ee4cb4f 100644 --- a/src/gzip.rs +++ b/src/gzip.rs @@ -1,20 +1,138 @@ -use flate2::read::{GzDecoder, GzEncoder}; -use flate2::Compression; -use std::error::Error; -use std::io::prelude::*; +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType, Output}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; -/// Decompress gzip data -pub fn decompress(data: &[u8]) -> Result, Box> { - let mut decoder = GzDecoder::new(data); - let mut buf = vec![]; - decoder.read_to_end(&mut buf)?; - Ok(buf) +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + Ok(()) } -/// Compress gzip data -pub fn compress(data: &[u8], level: u32) -> Result, Box> { - let mut buf = vec![]; - let mut encoder = GzEncoder::new(data, Compression::new(level)); - encoder.read_to_end(&mut buf)?; - Ok(buf) +/// Gzip decompression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.gzip.decompress(compressed_bytes, output_len=Optional[int]) +/// ``` +#[pyfunction] +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +/// Gzip compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.gzip.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 +/// ``` +#[pyfunction] +pub fn compress<'a>( + py: Python<'a>, + data: BytesType<'a>, + level: Option, + output_len: Option, +) -> PyResult> { + let level = level.unwrap_or_else(|| 6); + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +mod internal { + use crate::Output; + use flate2::read::{GzDecoder, GzEncoder}; + use flate2::Compression; + use std::io::prelude::*; + use std::io::Error; + + /// Decompress gzip data + pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { + let mut decoder = GzDecoder::new(data); + match output { + Output::Slice(slice) => decoder.read(slice), + Output::Vector(v) => decoder.read_to_end(v), + } + } + + /// Compress gzip data + pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: u32) -> Result { + let mut encoder = GzEncoder::new(data, Compression::new(level)); + match output { + Output::Slice(slice) => encoder.read(slice), + Output::Vector(v) => encoder.read_to_end(v), + } + } } diff --git a/src/lib.rs b/src/lib.rs index 7d0c9495..9ec2f01b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,223 +1,86 @@ //! CramJam documentation of python exported functions for (de)compression of bytes //! -//! The API follows `<>_compress` and `<>_decompress` +//! The API follows cramjam.`<>.compress` and cramjam.`<>.decompress` //! //! Python Example: //! //! ```python //! data = b'some bytes here' -//! compressed = cramjam.snappy_compress(data) -//! decompressed = cramjam.snappy_decompress(compressed) +//! compressed = cramjam.snappy.compress(data) +//! decompressed = cramjam.snappy.decompress(compressed) //! assert data == decompressed //! ``` -mod brotli; -mod deflate; -mod exceptions; -mod gzip; -mod lz4; -mod snappy; -mod zstd; +// TODO: There is a lot of very similar, but slightly different code for each variant +// time should be spent perhaps with a macro or other alternative. +// Each variant is similar, but sometimes has subtly different APIs/logic. -use pyo3::prelude::*; -use pyo3::types::PyBytes; -use pyo3::wrap_pyfunction; - -use exceptions::{CompressionError, DecompressionError}; - -macro_rules! to_py_err { - ($error:ident -> $expr:expr) => { - $expr.map_err(|err| PyErr::new::<$error, _>(err.to_string())) - }; -} - -/// Snappy decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> snappy_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn snappy_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> snappy::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) -} - -/// Snappy compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> snappy_compress(b'some bytes here') -/// ``` -#[pyfunction] -pub fn snappy_compress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let compressed = to_py_err!(CompressionError -> snappy::compress(data))?; - Ok(PyBytes::new(py, &compressed)) -} - -/// Snappy decompression, raw -/// This does not use the snappy 'framed' encoding of compressed bytes. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> snappy_decompress_raw(compressed_raw_bytes) -/// ``` -#[pyfunction] -pub fn snappy_decompress_raw<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> snappy::decompress_raw(data))?; - Ok(PyBytes::new(py, &decompressed)) -} - -/// Snappy compression raw. -/// This does not use the snappy 'framed' encoding of compressed bytes. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> snappy_compress_raw(b'some bytes here') -/// ``` -#[pyfunction] -pub fn snappy_compress_raw<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let compressed = to_py_err!(CompressionError -> snappy::compress_raw(data))?; - Ok(PyBytes::new(py, &compressed)) -} +// TODO: Add output size estimation for each variant, now it's just snappy +// allow for resizing PyByteArray if over allocated; cannot resize PyBytes yet. -/// Brotli decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> brotli_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn brotli_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> brotli::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) -} - -/// Brotli compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> brotli_compress(b'some bytes here', level=9) # level defaults to 11 -/// ``` -#[pyfunction] -pub fn brotli_compress<'a>(py: Python<'a>, data: &'a [u8], level: Option) -> PyResult<&'a PyBytes> { - let level = level.unwrap_or_else(|| 11); - let compressed = to_py_err!(CompressionError -> brotli::compress(data, level))?; - Ok(PyBytes::new(py, &compressed)) -} +pub mod brotli; +pub mod deflate; +pub mod exceptions; +pub mod gzip; +pub mod lz4; +pub mod snappy; +pub mod zstd; -/// LZ4 compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> lz4_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn lz4_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> lz4::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) -} +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; -/// lZ4 compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> lz4_compress(b'some bytes here') -/// ``` -#[pyfunction] -pub fn lz4_compress<'a>(py: Python<'a>, data: &'a [u8], level: Option) -> PyResult<&'a PyBytes> { - let level = level.unwrap_or_else(|| 4); - let compressed = to_py_err!(CompressionError -> lz4::compress(data, level))?; - Ok(PyBytes::new(py, &compressed)) -} +use exceptions::{CompressionError, DecompressionError}; -/// Gzip decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> gzip_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn gzip_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> gzip::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) +#[derive(FromPyObject)] +pub enum BytesType<'a> { + #[pyo3(transparent, annotation = "bytes")] + Bytes(&'a PyBytes), + #[pyo3(transparent, annotation = "bytearray")] + ByteArray(&'a PyByteArray), } -/// Gzip compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> gzip_compress(b'some bytes here', level=2) # Level defaults to 6 -/// ``` -#[pyfunction] -pub fn gzip_compress<'a>(py: Python<'a>, data: &'a [u8], level: Option) -> PyResult<&'a PyBytes> { - let level = level.unwrap_or_else(|| 6); - let compressed = to_py_err!(CompressionError -> gzip::compress(data, level))?; - Ok(PyBytes::new(py, &compressed)) +impl<'a> BytesType<'a> { + fn len(&self) -> usize { + self.as_bytes().len() + } + fn as_bytes(&self) -> &'a [u8] { + match self { + Self::Bytes(b) => b.as_bytes(), + Self::ByteArray(b) => unsafe { b.as_bytes() }, + } + } } -/// Deflate decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> deflate_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn deflate_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> deflate::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) +impl<'a> IntoPy for BytesType<'a> { + fn into_py(self, py: Python) -> PyObject { + match self { + Self::Bytes(bytes) => bytes.to_object(py), + Self::ByteArray(byte_array) => byte_array.to_object(py), + } + } } -/// Deflate compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> deflate_compress(b'some bytes here', level=5) # level defaults to 6 -/// ``` -#[pyfunction] -pub fn deflate_compress<'a>(py: Python<'a>, data: &'a [u8], level: Option) -> PyResult<&'a PyBytes> { - let level = level.unwrap_or_else(|| 6); - let compressed = to_py_err!(CompressionError -> deflate::compress(data, level))?; - Ok(PyBytes::new(py, &compressed)) +/// Buffer to de/compression algorithms' output. +/// ::Vector used when the output len cannot be determined, and/or resulting +/// python object cannot be resized to what the actual bytes decoded was. +pub enum Output<'a> { + Slice(&'a mut [u8]), + Vector(&'a mut Vec), } -/// ZSTD decompression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> zstd_decompress(compressed_bytes) -/// ``` -#[pyfunction] -pub fn zstd_decompress<'a>(py: Python<'a>, data: &'a [u8]) -> PyResult<&'a PyBytes> { - let decompressed = to_py_err!(DecompressionError -> zstd::decompress(data))?; - Ok(PyBytes::new(py, &decompressed)) +#[macro_export] +macro_rules! to_py_err { + ($error:ident -> $expr:expr) => { + $expr.map_err(|err| PyErr::new::<$error, _>(err.to_string())) + }; } -/// ZSTD compression. -/// -/// Python Example -/// -------------- -/// ```python -/// >>> zstd_compress(b'some bytes here', level=0) # level defaults to 11 -/// ``` -#[pyfunction] -pub fn zstd_compress<'a>(py: Python<'a>, data: &'a [u8], level: Option) -> PyResult<&'a PyBytes> { - let level = level.unwrap_or_else(|| 0); // 0 will use zstd's default, currently 11 - let compressed = to_py_err!(CompressionError -> zstd::compress(data, level))?; - Ok(PyBytes::new(py, &compressed)) +macro_rules! make_submodule { + ($py:ident -> $parent:ident -> $submodule:ident) => { + let sub_mod = PyModule::new($py, stringify!($submodule))?; + $submodule::init_py_module(sub_mod)?; + $parent.add_submodule(sub_mod)?; + }; } #[pymodule] @@ -225,25 +88,12 @@ fn cramjam(py: Python, m: &PyModule) -> PyResult<()> { m.add("CompressionError", py.get_type::())?; m.add("DecompressionError", py.get_type::())?; - m.add_wrapped(wrap_pyfunction!(snappy_compress))?; - m.add_wrapped(wrap_pyfunction!(snappy_decompress))?; - m.add_wrapped(wrap_pyfunction!(snappy_compress_raw))?; - m.add_wrapped(wrap_pyfunction!(snappy_decompress_raw))?; - - m.add_wrapped(wrap_pyfunction!(brotli_compress))?; - m.add_wrapped(wrap_pyfunction!(brotli_decompress))?; - - m.add_wrapped(wrap_pyfunction!(lz4_compress))?; - m.add_wrapped(wrap_pyfunction!(lz4_decompress))?; - - m.add_wrapped(wrap_pyfunction!(gzip_compress))?; - m.add_wrapped(wrap_pyfunction!(gzip_decompress))?; - - m.add_wrapped(wrap_pyfunction!(deflate_compress))?; - m.add_wrapped(wrap_pyfunction!(deflate_decompress))?; - - m.add_wrapped(wrap_pyfunction!(zstd_compress))?; - m.add_wrapped(wrap_pyfunction!(zstd_decompress))?; + make_submodule!(py -> m -> snappy); + make_submodule!(py -> m -> brotli); + make_submodule!(py -> m -> lz4); + make_submodule!(py -> m -> gzip); + make_submodule!(py -> m -> deflate); + make_submodule!(py -> m -> zstd); Ok(()) } diff --git a/src/lz4.rs b/src/lz4.rs index 590caabe..c68fa2e0 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -1,14 +1,82 @@ -use std::error::Error; +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; -/// Decompress lz4 data -pub fn decompress(data: &[u8]) -> Result, Box> { - lz_fear::framed::decompress_frame(data).map_err(|err| err.into()) +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + Ok(()) } -/// Compress lz4 data -// TODO: lz-fear does not yet support level -pub fn compress(data: &[u8], _level: u32) -> Result, Box> { - let mut buf = vec![]; - lz_fear::framed::CompressionSettings::default().compress(data, &mut buf)?; - Ok(buf) +/// LZ4 compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time +/// >>> cramjam.lz4.decompress(compressed_bytes, output_len=Optional[int]) +/// ``` +#[pyfunction] +#[allow(unused_variables)] // TODO: Make use of output_len for lz4 +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + match data { + BytesType::Bytes(input) => { + let out = to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes()))?; + Ok(BytesType::Bytes(PyBytes::new(py, &out))) + } + BytesType::ByteArray(input) => { + let out = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + } + } +} + +/// lZ4 compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> # Note, output_len is currently ignored; underlying algorithm does not support reading to slice at this time +/// >>> cramjam.lz4.compress(b'some bytes here', output_len=Optional[int]) +/// ``` +#[pyfunction] +#[allow(unused_variables)] +pub fn compress<'a>( + py: Python<'a>, + data: BytesType<'a>, + level: Option, + output_len: Option, +) -> PyResult> { + let level = level.unwrap_or_else(|| 4); + + match data { + BytesType::Bytes(input) => { + let out = to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), level))?; + Ok(BytesType::Bytes(PyBytes::new(py, &out))) + } + BytesType::ByteArray(input) => { + let out = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, level))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + } + } +} + +mod internal { + use std::error::Error; + + /// Decompress lz4 data + pub fn decompress(data: &[u8]) -> Result, Box> { + lz_fear::framed::decompress_frame(data).map_err(|err| err.into()) + } + + /// Compress lz4 data + // TODO: lz-fear does not yet support level + pub fn compress(data: &[u8], _level: u32) -> Result, Box> { + let mut buf = vec![]; + lz_fear::framed::CompressionSettings::default().compress(data, &mut buf)?; + Ok(buf) + } } diff --git a/src/snappy.rs b/src/snappy.rs index b28999ff..67794fe4 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -1,31 +1,186 @@ -use snap::raw::{Decoder, Encoder}; -use snap::read::{FrameDecoder, FrameEncoder}; -use std::io::{Error, Read}; - -/// Decompress snappy data raw -pub fn decompress_raw(data: &[u8]) -> Result, snap::Error> { - let mut decoder = Decoder::new(); - decoder.decompress_vec(data) +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType, Output}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; +use snap::raw::{decompress_len, max_compress_len}; + +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + m.add_function(wrap_pyfunction!(compress_raw, m)?)?; + m.add_function(wrap_pyfunction!(decompress_raw, m)?)?; + Ok(()) +} + +/// Snappy decompression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> # bytes or bytearray; bytearray is faster +/// >>> cramjam.snappy.decompress(compressed_bytes, output_len=Optional[None]) +/// ``` +#[pyfunction] +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + let estimated_len = match output_len { + Some(len) => len, + None => to_py_err!(DecompressionError -> decompress_len(data.as_bytes()))?, + }; + let result = match data { + BytesType::Bytes(bytes) => { + let pybytes = if output_len.is_some() { + PyBytes::new_with(py, estimated_len, |buffer| { + to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Slice(buffer)))?; + Ok(()) + })? + } else { + let mut buffer = Vec::with_capacity(estimated_len); + + to_py_err!(DecompressionError -> self::internal::decompress(bytes.as_bytes(), Output::Vector(&mut buffer)))?; + PyBytes::new(py, &buffer) + }; + BytesType::Bytes(pybytes) + } + BytesType::ByteArray(bytes_array) => unsafe { + let mut actual_len = 0; + let pybytes = PyByteArray::new_with(py, estimated_len, |output| { + actual_len = to_py_err!(DecompressionError -> self::internal::decompress(bytes_array.as_bytes(), Output::Slice(output)))?; + Ok(()) + })?; + pybytes.resize(actual_len)?; + BytesType::ByteArray(pybytes) + }, + }; + Ok(result) } -/// Compress snappy data raw -pub fn compress_raw(data: &[u8]) -> Result, snap::Error> { - let mut encoder = Encoder::new(); - encoder.compress_vec(data) +/// Snappy compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> _ = cramjam.snappy.compress(b'some bytes here') +/// >>> _ = cramjam.snappy.compress(bytearray(b'this avoids double allocation in rust side, and thus faster!')) # <- use bytearray where possible +/// ``` +#[pyfunction] +pub fn compress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + // Prefer the user's output_len, fallback to estimate the output len + let estimated_len = output_len.unwrap_or_else(|| max_compress_len(data.len())); + + let result = match data { + BytesType::Bytes(bytes) => { + // user provided the exact output len + if output_len.is_some() { + let pybytes = PyBytes::new_with(py, estimated_len, |buffer| { + to_py_err!(CompressionError -> self::internal::compress(bytes.as_bytes(), Output::Slice(buffer)))?; + Ok(()) + })?; + BytesType::Bytes(pybytes) + + // we can use the estimated length, but need to use buffer as we don't know for sure the length + } else { + let mut buffer = Vec::with_capacity(estimated_len); + + to_py_err!(CompressionError -> self::internal::compress(bytes.as_bytes(), Output::Vector(&mut buffer)))?; + + let pybytes = PyBytes::new(py, &buffer); + BytesType::Bytes(pybytes) + } + } + BytesType::ByteArray(bytes_array) => { + let bytes = unsafe { bytes_array.as_bytes() }; + let mut actual_len = 0; + let pybytes = PyByteArray::new_with(py, estimated_len, |output| { + actual_len = to_py_err!(CompressionError -> self::internal::compress(bytes, Output::Slice(output)))?; + Ok(()) + })?; + pybytes.resize(actual_len)?; + BytesType::ByteArray(pybytes) + } + }; + Ok(result) } -/// Decompress snappy data framed -pub fn decompress(data: &[u8]) -> Result, Error> { - let mut buf = vec![]; - let mut decoder = FrameDecoder::new(data); - decoder.read_to_end(&mut buf)?; - Ok(buf) +/// Snappy decompression, raw +/// This does not use the snappy 'framed' encoding of compressed bytes. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.snappy.decompress_raw(compressed_raw_bytes) +/// ``` +#[pyfunction] +pub fn decompress_raw<'a>(py: Python<'a>, data: BytesType<'a>) -> PyResult> { + match data { + BytesType::Bytes(input) => { + let out = to_py_err!(DecompressionError -> self::internal::decompress_raw(input.as_bytes()))?; + Ok(BytesType::Bytes(PyBytes::new(py, &out))) + } + BytesType::ByteArray(input) => { + let out = to_py_err!(DecompressionError -> self::internal::decompress_raw(unsafe { input.as_bytes() }))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + } + } } -/// Decompress snappy data framed -pub fn compress(data: &[u8]) -> Result, Error> { - let mut buf = vec![]; - let mut encoder = FrameEncoder::new(data); - encoder.read_to_end(&mut buf)?; - Ok(buf) +/// Snappy compression raw. +/// This does not use the snappy 'framed' encoding of compressed bytes. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.snappy.compress_raw(b'some bytes here') +/// ``` +#[pyfunction] +pub fn compress_raw<'a>(py: Python<'a>, data: BytesType<'a>) -> PyResult> { + match data { + BytesType::Bytes(input) => { + let out = to_py_err!(CompressionError -> self::internal::compress_raw(input.as_bytes()))?; + Ok(BytesType::Bytes(PyBytes::new(py, &out))) + } + BytesType::ByteArray(input) => { + let out = to_py_err!(CompressionError -> self::internal::compress_raw(unsafe { input.as_bytes() }))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &out))) + } + } +} + +mod internal { + use snap::raw::{Decoder, Encoder}; + use snap::read::{FrameDecoder, FrameEncoder}; + use std::io::{Error, Read}; + + use crate::Output; + + /// Decompress snappy data raw + pub fn decompress_raw(data: &[u8]) -> Result, snap::Error> { + let mut decoder = Decoder::new(); + decoder.decompress_vec(data) + } + + /// Compress snappy data raw + pub fn compress_raw(data: &[u8]) -> Result, snap::Error> { + let mut encoder = Encoder::new(); + encoder.compress_vec(data) + } + + /// Decompress snappy data framed + pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { + let mut decoder = FrameDecoder::new(data); + match output { + Output::Slice(slice) => decoder.read(slice), + Output::Vector(v) => decoder.read_to_end(v), + } + } + + /// Decompress snappy data framed + pub fn compress<'a>(data: &'a [u8], output: Output<'a>) -> Result { + let mut encoder = FrameEncoder::new(data); + match output { + Output::Slice(slice) => encoder.read(slice), + Output::Vector(v) => encoder.read_to_end(v), + } + } } diff --git a/src/zstd.rs b/src/zstd.rs index d49b7f08..31773928 100644 --- a/src/zstd.rs +++ b/src/zstd.rs @@ -1,11 +1,137 @@ -use std::io::Error; +use crate::exceptions::{CompressionError, DecompressionError}; +use crate::{to_py_err, BytesType, Output}; +use pyo3::prelude::*; +use pyo3::types::{PyByteArray, PyBytes}; +use pyo3::wrap_pyfunction; +use pyo3::{PyResult, Python}; -/// Decompress gzip data -pub fn decompress(data: &[u8]) -> Result, Error> { - zstd::stream::decode_all(data) +pub fn init_py_module(m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(compress, m)?)?; + m.add_function(wrap_pyfunction!(decompress, m)?)?; + Ok(()) } -/// Compress gzip data -pub fn compress(data: &[u8], level: i32) -> Result, Error> { - zstd::stream::encode_all(data, level) +/// ZSTD decompression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.zstd.decompress(compressed_bytes, output_len=Optional[int]) +/// ``` +#[pyfunction] +pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option) -> PyResult> { + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +/// ZSTD compression. +/// +/// Python Example +/// -------------- +/// ```python +/// >>> cramjam.zstd.compress(b'some bytes here', level=0, output_len=Optional[int]) # level defaults to 11 +/// ``` +#[pyfunction] +pub fn compress<'a>( + py: Python<'a>, + data: BytesType<'a>, + level: Option, + output_len: Option, +) -> PyResult> { + let level = level.unwrap_or_else(|| 0); // 0 will use zstd's default, currently 11 + + match data { + BytesType::Bytes(input) => match output_len { + Some(len) => { + let pybytes = PyBytes::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(()) + })?; + Ok(BytesType::Bytes(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?; + Ok(BytesType::Bytes(PyBytes::new(py, &buffer))) + } + }, + BytesType::ByteArray(input) => match output_len { + Some(len) => { + let mut size = 0; + let pybytes = PyByteArray::new_with(py, len, |buffer| { + let output = Output::Slice(buffer); + size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(()) + })?; + pybytes.resize(size)?; + Ok(BytesType::ByteArray(pybytes)) + } + None => { + let mut buffer = Vec::with_capacity(data.len() / 10); + let output = Output::Vector(&mut buffer); + to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?; + Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer))) + } + }, + } +} + +mod internal { + + use crate::Output; + use std::io::{Error, Read}; + + /// Decompress gzip data + pub fn decompress<'a>(data: &'a [u8], output: Output<'a>) -> Result { + let mut decoder = zstd::stream::read::Decoder::new(data)?; + match output { + Output::Slice(slice) => decoder.read(slice), + Output::Vector(v) => decoder.read_to_end(v), + } + } + + /// Compress gzip data + pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: i32) -> Result { + let mut encoder = zstd::stream::read::Encoder::new(data, level)?; + match output { + Output::Slice(slice) => encoder.read(slice), + Output::Vector(v) => encoder.read_to_end(v), + } + } } diff --git a/tests/test_variants.py b/tests/test_variants.py index 00b9923e..dea233c0 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -1,29 +1,28 @@ import pytest import cramjam -from cramjam import DecompressionError, CompressionError @pytest.mark.parametrize( - "variant", ("snappy", "brotli", "lz4", "gzip", "deflate", "zstd") + "variant_str", ("snappy", "brotli", "lz4", "gzip", "deflate", "zstd") ) -def test_variants_simple(variant): +def test_variants_simple(variant_str): - compress = getattr(cramjam, f"{variant}_compress") - decompress = getattr(cramjam, f"{variant}_decompress") + variant = getattr(cramjam, variant_str) uncompressed = b"some bytes to compress 123" * 1000 - compressed = compress(uncompressed) + compressed = variant.compress(uncompressed) assert compressed != uncompressed - decompressed = decompress(compressed) + decompressed = variant.decompress(compressed, output_len=len(uncompressed)) assert decompressed == uncompressed @pytest.mark.parametrize( - "variant", ("snappy", "brotli", "lz4", "gzip", "deflate", "zstd") + "variant_str", ("snappy", "brotli", "lz4", "gzip", "deflate", "zstd") ) -def test_variants_raise_exception(variant): +def test_variants_raise_exception(variant_str): + variant = getattr(cramjam, variant_str) with pytest.raises(cramjam.DecompressionError): - getattr(cramjam, f"{variant}_decompress")(b'sknow') + variant.decompress(b'sknow')