From 4298f5e32849a6c8aba3b64f903b2749d0455180 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 18 May 2022 10:47:34 +0200 Subject: [PATCH 1/7] Build bitshuffle with zstd from blosc --- setup.py | 84 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/setup.py b/setup.py index fe2f2815..1a3356af 100644 --- a/setup.py +++ b/setup.py @@ -539,37 +539,6 @@ def prefix(directory, files): """Mapping plugin name to library name they depend on""" -# bitshuffle (+lz4) plugin -# Plugins from https://github.com/kiyo-masui/bitshuffle -bithsuffle_dir = 'src/bitshuffle' - -# Set compile args for both MSVC and others, list is stripped at build time -extra_compile_args = ['-O3', '-ffast-math', '-std=c99', '-fopenmp'] -extra_compile_args += ['/Ox', '/fp:fast', '/openmp'] -if platform.machine() == "ppc64le": - # Required on ppc64le - sse2_options = {'extra_compile_args': ['-DUSESSE2'] } -else: - sse2_options = {} -extra_link_args = ['-fopenmp', '/openmp'] - -bithsuffle_plugin = HDF5PluginExtension( - "hdf5plugin.plugins.libh5bshuf", - sources=prefix(bithsuffle_dir, - ["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c", - "src/bitshuffle.c", "src/bitshuffle_core.c", - "src/iochain.c", "lz4/lz4.c"]), - depends=prefix(bithsuffle_dir, - ["src/bitshuffle.h", "src/bitshuffle_core.h", - "src/iochain.h", 'src/bshuf_h5filter.h', - "lz4/lz4.h"]), - include_dirs=prefix(bithsuffle_dir, ['src/', 'lz4/']), - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, - sse2=sse2_options, - ) - - # blosc plugin # Plugin from https://github.com/Blosc/hdf5-blosc # c-blosc from https://github.com/Blosc/c-blosc @@ -633,10 +602,14 @@ def prefix(directory, files): define_macros.append(('HAVE_ZLIB', 1)) # zstd -sources += glob(blosc_dir +'internal-complibs/zstd*/*/*.c') -depends += glob(blosc_dir +'internal-complibs/zstd*/*/*.h') -include_dirs += glob(blosc_dir + 'internal-complibs/zstd*') -include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common') +zstd_sources = glob(blosc_dir +'internal-complibs/zstd*/*/*.c') +zstd_depends = glob(blosc_dir +'internal-complibs/zstd*/*/*.h') +zstd_include_dirs = glob(blosc_dir + 'internal-complibs/zstd*') +zstd_include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common') + +sources += zstd_sources +depends += zstd_depends +include_dirs += zstd_include_dirs define_macros.append(('HAVE_ZSTD', 1)) extra_compile_args = ['-std=gnu99'] # Needed to build manylinux1 wheels @@ -664,19 +637,50 @@ def prefix(directory, files): # HDF5Plugin-Zstandard zstandard_dir = os.path.join("src", "HDF5Plugin-Zstandard") -zstandard_include_dirs = glob(blosc_dir + 'internal-complibs/zstd*') -zstandard_include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common') zstandard_sources = [os.path.join(zstandard_dir, 'zstd_h5plugin.c')] -zstandard_sources += glob(blosc_dir +'internal-complibs/zstd*/*/*.c') +zstandard_sources += zstd_sources zstandard_depends = [os.path.join(zstandard_dir, 'zstd_h5plugin.h')] -zstandard_depends += glob(blosc_dir +'internal-complibs/zstd*/*/*.h') +zstandard_depends += zstd_depends zstandard_plugin = HDF5PluginExtension( "hdf5plugin.plugins.libh5zstd", sources=zstandard_sources, depends=zstandard_depends, - include_dirs=zstandard_include_dirs, + include_dirs=zstd_include_dirs, ) +# bitshuffle (+lz4 or zstd) plugin +# Plugins from https://github.com/kiyo-masui/bitshuffle +bithsuffle_dir = 'src/bitshuffle' + +# Set compile args for both MSVC and others, list is stripped at build time +extra_compile_args = ['-O3', '-ffast-math', '-std=c99', '-fopenmp'] +extra_compile_args += ['/Ox', '/fp:fast', '/openmp'] +if platform.machine() == "ppc64le": + # Required on ppc64le + sse2_options = {'extra_compile_args': ['-DUSESSE2'] } +else: + sse2_options = {} +extra_link_args = ['-fopenmp', '/openmp'] +define_macros = [("ZSTD_SUPPORT", 1)] + +bithsuffle_plugin = HDF5PluginExtension( + "hdf5plugin.plugins.libh5bshuf", + sources=prefix(bithsuffle_dir, + ["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c", + "src/bitshuffle.c", "src/bitshuffle_core.c", + "src/iochain.c", "lz4/lz4.c"]) + zstd_sources, + depends=prefix(bithsuffle_dir, + ["src/bitshuffle.h", "src/bitshuffle_core.h", + "src/iochain.h", 'src/bshuf_h5filter.h', + "lz4/lz4.h"]) + zstd_depends, + include_dirs=prefix(bithsuffle_dir, ['src/', 'lz4/']) + zstd_include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + sse2=sse2_options, + ) + + # lz4 plugin # Source from https://github.com/nexusformat/HDF5-External-Filter-Plugins From a1c258abc42c4b48ce1662afbfb7044d5b9b3925 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 18 May 2022 10:47:59 +0200 Subject: [PATCH 2/7] update doc --- doc/contribute.rst | 10 ++++++++-- doc/information.rst | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/contribute.rst b/doc/contribute.rst index 0abb1538..12850c2a 100644 --- a/doc/contribute.rst +++ b/doc/contribute.rst @@ -89,12 +89,18 @@ The meaning of those integers is filter dependent and is described below. bitshuffle .......... -compression_opts: (**block_size**, **lz4 compression**) +compression_opts: (**block_size**, **compression**, **level**) - **block size**: Number of elements (not bytes) per block. It MUST be a mulitple of 8. Default: 0 for a block size of about 8 kB. -- **lz4 compression**: 0: disabled (default), 2: enabled. +- **compression**: + + * 0: No compression + * 2: LZ4 + * 3: Zstd + +- **level**: Compression level, only used with Zstd compression. By default the filter uses bitshuffle, but does NOT compress with LZ4. diff --git a/doc/information.rst b/doc/information.rst index 1c761868..20c13248 100644 --- a/doc/information.rst +++ b/doc/information.rst @@ -50,7 +50,7 @@ HDF5 filters and compression libraries HDF5 compression filters and compression libraries sources were obtained from: * LZ4 plugin (commit d48f960) and lz4 (v1.9.3): https://github.com/nexusformat/HDF5-External-Filter-Plugins and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/lz4-1.9.3 -* bitshuffle plugin (0.3.5): https://github.com/kiyo-masui/bitshuffle +* bitshuffle plugin (0.4.2) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0 * bzip2 plugin (from PyTables v3.7.0) and bzip2 (v1.0.8): https://github.com/PyTables/PyTables/, https://sourceware.org/git/bzip2.git * hdf5-blosc plugin (v1.0.0), c-blosc (v1.21.1) and snappy (v1.1.9): https://github.com/Blosc/hdf5-blosc, https://github.com/Blosc/c-blosc and https://github.com/google/snappy * FCIDECOMP plugin (v1.0.2) and CharLS (branch 1.x-master SHA1 ID: 25160a42fb62e71e4b0ce081f5cb3f8bb73938b5): From 2628fefc66872507d90edaca536fcaa451c31f76 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 18 May 2022 10:48:25 +0200 Subject: [PATCH 3/7] update python wrapper and tests --- src/hdf5plugin/__init__.py | 44 ++++++++++++++++++++++++++++++++------ src/hdf5plugin/test.py | 21 +++++++++++++++++- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/src/hdf5plugin/__init__.py b/src/hdf5plugin/__init__.py index a731a1fa..6f8f7938 100644 --- a/src/hdf5plugin/__init__.py +++ b/src/hdf5plugin/__init__.py @@ -141,18 +141,48 @@ class Bitshuffle(_FilterRefClass): The number of elements per block. It needs to be divisible by eight (default is 0, about 8kB per block) Default: 0 (for about 8kB per block). - :param bool lz4: - Whether to use lz4 compression or not as part of the filter. - Default: True + :param str cname: + `lz4` (default), `none`, `zstd` + :param int clevel: Compression level, used only for `zstd` compression. + Can be negative, and must be below or equal to 22 (maximum compression). + Default: 3. """ filter_id = BSHUF_ID - def __init__(self, nelems=0, lz4=True): + __COMPRESSIONS = { + 'none': 0, + 'lz4': 2, + 'zstd': 3, + } + + def __init__(self, nelems=0, cname=None, clevel=3, lz4=None): nelems = int(nelems) assert nelems % 8 == 0 - - lz4_enabled = 2 if lz4 else 0 - self.filter_options = (nelems, lz4_enabled) + assert clevel <= 22 + + if lz4 is not None: + if cname is not None: + raise ValueError("Providing both cname and lz4 arguments is not supported") + _logger.warning( + "Depreaction: hdf5plugin.Bitshuffle's lz4 argument is deprecated, " + "use cname='lz4' or 'none' instead.") + cname = 'lz4' if lz4 else 'none' + + if cname in (True, False): + _logger.warning( + "Depreaction: hdf5plugin.Bitshuffle's boolean argument is deprecated, " + "use cname='lz4' or 'none' instead.") + cname = 'lz4' if cname else 'none' + + if cname is None: + cname = 'lz4' + if cname not in self.__COMPRESSIONS: + raise ValueError("Unsupported compression: %s" % cname) + + if cname == 'zstd': + self.filter_options = (nelems, self.__COMPRESSIONS[cname], clevel) + else: + self.filter_options = (nelems, self.__COMPRESSIONS[cname]) class Blosc(_FilterRefClass): diff --git a/src/hdf5plugin/test.py b/src/hdf5plugin/test.py index 25047b67..277aa7e1 100644 --- a/src/hdf5plugin/test.py +++ b/src/hdf5plugin/test.py @@ -107,7 +107,7 @@ def _test(self, return filters[0] @unittest.skipUnless(should_test("bshuf"), "Bitshuffle filter not available") - def testBitshuffle(self): + def testDepreactedBitshuffle(self): """Write/read test with bitshuffle filter plugin""" self._test('bshuf') # Default options @@ -119,6 +119,25 @@ def testBitshuffle(self): filter_ = self._test('bshuf', dtype, compressed=lz4, nelems=nelems, lz4=lz4) self.assertEqual(filter_[2][3:], (nelems, 2 if lz4 else 0)) + @unittest.skipUnless(should_test("bshuf"), "Bitshuffle filter not available") + def testBitshuffle(self): + """Write/read test with bitshuffle filter plugin""" + self._test('bshuf') # Default options + + compression_ids = { + 'none': 0, + 'lz4': 2, + 'zstd': 3 + } + + # Specify options + for cname in ('none', 'lz4', 'zstd'): + for dtype in (numpy.int8, numpy.int16, numpy.int32, numpy.int64): + for nelems in (1024, 2048): + with self.subTest(cname=cname, dtype=dtype, nelems=nelems): + filter_ = self._test('bshuf', dtype, compressed=cname!='none', nelems=nelems, cname=cname) + self.assertEqual(filter_[2][3:5], (nelems, compression_ids[cname])) + @unittest.skipUnless(should_test("blosc"), "Blosc filter not available") def testBlosc(self): """Write/read test with blosc filter plugin""" From 33018d6626dd90a027cf41b583c0d35f7ed8c391 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Wed, 18 May 2022 11:30:08 +0200 Subject: [PATCH 4/7] remove src/bitshuffle --- src/bitshuffle/.gitignore | 77 - src/bitshuffle/.travis.yml | 33 - src/bitshuffle/LICENSE | 21 - src/bitshuffle/MANIFEST.in | 10 - src/bitshuffle/README.rst | 240 --- src/bitshuffle/bitshuffle/__init__.py | 21 - src/bitshuffle/bitshuffle/ext.pyx | 449 ---- src/bitshuffle/bitshuffle/h5.pyx | 205 -- src/bitshuffle/bitshuffle/tests/__init__.py | 0 .../bitshuffle/tests/data/regression_0.1.3.h5 | Bin 114447 -> 0 bytes .../bitshuffle/tests/make_regression_tdata.py | 42 - src/bitshuffle/bitshuffle/tests/test_ext.py | 588 ------ .../bitshuffle/tests/test_h5filter.py | 91 - .../bitshuffle/tests/test_h5plugin.py | 83 - .../bitshuffle/tests/test_regression.py | 40 - src/bitshuffle/conda-recipe/bld.bat | 3 - src/bitshuffle/conda-recipe/build.sh | 2 - src/bitshuffle/conda-recipe/meta.yaml | 27 - src/bitshuffle/conda-recipe/setup.py.patch | 13 - src/bitshuffle/lz4/LICENSE | 24 - src/bitshuffle/lz4/README.md | 21 - src/bitshuffle/lz4/lz4.c | 1516 -------------- src/bitshuffle/lz4/lz4.h | 360 ---- src/bitshuffle/lzf/LICENSE.txt | 34 - src/bitshuffle/lzf/README.txt | 84 - src/bitshuffle/lzf/README_bitshuffle.txt | 3 - src/bitshuffle/lzf/example.c | 106 - src/bitshuffle/lzf/lzf/lzf.h | 100 - src/bitshuffle/lzf/lzf/lzfP.h | 166 -- src/bitshuffle/lzf/lzf/lzf_c.c | 296 --- src/bitshuffle/lzf/lzf/lzf_d.c | 154 -- src/bitshuffle/lzf/lzf_filter.c | 261 --- src/bitshuffle/lzf/lzf_filter.h | 38 - src/bitshuffle/requirements.txt | 5 - src/bitshuffle/setup.cfg.example | 10 - src/bitshuffle/setup.py | 323 --- src/bitshuffle/src/bitshuffle.c | 165 -- src/bitshuffle/src/bitshuffle.h | 123 -- src/bitshuffle/src/bitshuffle_core.c | 1862 ----------------- src/bitshuffle/src/bitshuffle_core.h | 157 -- src/bitshuffle/src/bitshuffle_internals.h | 75 - src/bitshuffle/src/bshuf_h5filter.c | 218 -- src/bitshuffle/src/bshuf_h5filter.h | 59 - src/bitshuffle/src/bshuf_h5plugin.c | 19 - src/bitshuffle/src/iochain.c | 90 - src/bitshuffle/src/iochain.h | 94 - src/bitshuffle/src/lzf_h5plugin.c | 42 - 47 files changed, 8350 deletions(-) delete mode 100644 src/bitshuffle/.gitignore delete mode 100644 src/bitshuffle/.travis.yml delete mode 100644 src/bitshuffle/LICENSE delete mode 100644 src/bitshuffle/MANIFEST.in delete mode 100644 src/bitshuffle/README.rst delete mode 100644 src/bitshuffle/bitshuffle/__init__.py delete mode 100644 src/bitshuffle/bitshuffle/ext.pyx delete mode 100644 src/bitshuffle/bitshuffle/h5.pyx delete mode 100644 src/bitshuffle/bitshuffle/tests/__init__.py delete mode 100644 src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5 delete mode 100644 src/bitshuffle/bitshuffle/tests/make_regression_tdata.py delete mode 100644 src/bitshuffle/bitshuffle/tests/test_ext.py delete mode 100644 src/bitshuffle/bitshuffle/tests/test_h5filter.py delete mode 100644 src/bitshuffle/bitshuffle/tests/test_h5plugin.py delete mode 100644 src/bitshuffle/bitshuffle/tests/test_regression.py delete mode 100644 src/bitshuffle/conda-recipe/bld.bat delete mode 100644 src/bitshuffle/conda-recipe/build.sh delete mode 100644 src/bitshuffle/conda-recipe/meta.yaml delete mode 100644 src/bitshuffle/conda-recipe/setup.py.patch delete mode 100644 src/bitshuffle/lz4/LICENSE delete mode 100644 src/bitshuffle/lz4/README.md delete mode 100644 src/bitshuffle/lz4/lz4.c delete mode 100644 src/bitshuffle/lz4/lz4.h delete mode 100644 src/bitshuffle/lzf/LICENSE.txt delete mode 100644 src/bitshuffle/lzf/README.txt delete mode 100644 src/bitshuffle/lzf/README_bitshuffle.txt delete mode 100644 src/bitshuffle/lzf/example.c delete mode 100644 src/bitshuffle/lzf/lzf/lzf.h delete mode 100644 src/bitshuffle/lzf/lzf/lzfP.h delete mode 100644 src/bitshuffle/lzf/lzf/lzf_c.c delete mode 100644 src/bitshuffle/lzf/lzf/lzf_d.c delete mode 100644 src/bitshuffle/lzf/lzf_filter.c delete mode 100644 src/bitshuffle/lzf/lzf_filter.h delete mode 100644 src/bitshuffle/requirements.txt delete mode 100644 src/bitshuffle/setup.cfg.example delete mode 100644 src/bitshuffle/setup.py delete mode 100644 src/bitshuffle/src/bitshuffle.c delete mode 100644 src/bitshuffle/src/bitshuffle.h delete mode 100644 src/bitshuffle/src/bitshuffle_core.c delete mode 100644 src/bitshuffle/src/bitshuffle_core.h delete mode 100644 src/bitshuffle/src/bitshuffle_internals.h delete mode 100644 src/bitshuffle/src/bshuf_h5filter.c delete mode 100644 src/bitshuffle/src/bshuf_h5filter.h delete mode 100644 src/bitshuffle/src/bshuf_h5plugin.c delete mode 100644 src/bitshuffle/src/iochain.c delete mode 100644 src/bitshuffle/src/iochain.h delete mode 100644 src/bitshuffle/src/lzf_h5plugin.c diff --git a/src/bitshuffle/.gitignore b/src/bitshuffle/.gitignore deleted file mode 100644 index d8d6cf49..00000000 --- a/src/bitshuffle/.gitignore +++ /dev/null @@ -1,77 +0,0 @@ -## C - -# Object files -*.o -*.ko -*.obj -*.elf - -# Libraries -*.lib -*.a - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - - -## Python -*.py[cod] - -# C extensions -*.so - -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -lib -lib64 -__pycache__ - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.coverage -.tox -nosetests.xml - -# Translations -*.mo - -# Mr Developer -.mr.developer.cfg -.project -.pydevproject - -# Documentation builds -doc/_build -doc/generated - -## Editor files and backups. -*.swp -*.swo - -# Generated files -bitshuffle/ext.c -bitshuffle/h5.c - diff --git a/src/bitshuffle/.travis.yml b/src/bitshuffle/.travis.yml deleted file mode 100644 index 7b5b4994..00000000 --- a/src/bitshuffle/.travis.yml +++ /dev/null @@ -1,33 +0,0 @@ -language: python -os: linux -# To test filter plugins, need hdf5 1.8.11+, present in Trusty but not Precise. -dist: trusty -# Required to get Trusty. -#sudo: true -python: - - "2.7" - - "3.4" - - "3.5" - - "3.6" -addons: - apt: - packages: - - libhdf5-serial-dev - - hdf5-tools -install: - - "pip install -U pip virtualenv" - # Ensures the system hdf5 headers/libs will be used whatever its version - - "export HDF5_DIR=/usr/lib" - - "pip install -r requirements.txt" - # Installing the plugin to arbitrary directory to check the install script. - - "python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib" - # Ensure it's installable and usable in virtualenv - - "virtualenv ~/venv" - - "travis_wait 30 ~/venv/bin/pip -v install --no-binary=h5py ." - - "~/venv/bin/pip -v install nose" -# Can't be somewhere that has a 'bitshuffle' directory as nose will use that -# copy instead of installed package. -script: - - "cd ~" - - "nosetests -v bitshuffle" # Test the system install - - "venv/bin/nosetests -v bitshuffle" # Test the virtualenv install diff --git a/src/bitshuffle/LICENSE b/src/bitshuffle/LICENSE deleted file mode 100644 index 1365ed69..00000000 --- a/src/bitshuffle/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -Bitshuffle - Filter for improving compression of typed binary data. - -Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca) - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/src/bitshuffle/MANIFEST.in b/src/bitshuffle/MANIFEST.in deleted file mode 100644 index 00746c64..00000000 --- a/src/bitshuffle/MANIFEST.in +++ /dev/null @@ -1,10 +0,0 @@ -recursive-include src *.h *.c -recursive-include bitshuffle *.pyx -recursive-include lz4 *.h *.c -recursive-include lzf *.h *.c -include setup.cfg.example -include LICENSE -include README.rst -include requirements.txt -exclude setup.cfg - diff --git a/src/bitshuffle/README.rst b/src/bitshuffle/README.rst deleted file mode 100644 index 343b4c62..00000000 --- a/src/bitshuffle/README.rst +++ /dev/null @@ -1,240 +0,0 @@ -========== -Bitshuffle -========== - -Filter for improving compression of typed binary data. - -Bitshuffle is an algorithm that rearranges typed, binary data for improving -compression, as well as a python/C package that implements this algorithm -within the Numpy framework. - -The library can be used along side HDF5 to compress and decompress datasets and -is integrated through the `dynamically loaded filters`_ framework. Bitshuffle -is HDF5 filter number ``32008``. - -Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_ -except it operates at the bit level instead of the byte level. Arranging a -typed data array in to a matrix with the elements as the rows and the bits -within the elements as the columns, Bitshuffle "transposes" the matrix, -such that all the least-significant-bits are in a row, etc. This transpose -is performed within blocks of data roughly 8kB long [1]_. - -This does not in itself compress data, only rearranges it for more efficient -compression. To perform the actual compression you will need a compression -library. Bitshuffle has been designed to be well matched Marc Lehmann's -LZF_ as well as LZ4_. Note that because Bitshuffle modifies the data at the bit -level, sophisticated entropy reducing compression libraries such as GZIP and -BZIP are unlikely to achieve significantly better compression than simpler and -faster duplicate-string-elimination algorithms such as LZF and LZ4. Bitshuffle -thus includes routines (and HDF5 filter options) to apply LZ4 compression to -each block after shuffling [2]_. - -The Bitshuffle algorithm relies on neighbouring elements of a dataset being -highly correlated to improve data compression. Any correlations that span at -least 24 elements of the dataset may be exploited to improve compression. - -Bitshuffle was designed with performance in mind. On most machines the -time required for Bitshuffle+LZ4 is insignificant compared to the time required -to read or write the compressed data to disk. Because it is able to exploit the -SSE and AVX instruction sets present on modern Intel and AMD processors, on -these machines compression is only marginally slower than an out-of-cache -memory copy. On modern x86 processors you can expect Bitshuffle to have a -throughput of roughly 1 byte per clock cycle, and on the Haswell generation of -Intel processors (2013) and later, you can expect up to 2 bytes per clock -cycle. In addition, Bitshuffle is parallelized using OpenMP. - -As a bonus, Bitshuffle ships with a dynamically loaded version of -`h5py`'s LZF compression filter, such that the filter can be transparently -used outside of python and in command line utilities such as ``h5dump``. - -.. [1] Chosen to fit comfortably within L1 cache as well as be well matched - window of the LZF compression library. - -.. [2] Over applying bitshuffle to the full dataset then applying LZ4 - compression, this has the tremendous advantage that the block is - already in the L1 cache. - -.. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf - -.. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf - -.. _LZF: http://oldhome.schmorp.de/marc/liblzf.html - -.. _LZ4: https://code.google.com/p/lz4/ - - -Applications ------------- - -Bitshuffle might be right for your application if: - -- You need to compress typed binary data. -- Your data is arranged such that adjacent elements over the fastest varying - index of your dataset are similar (highly correlated). -- A special case of the previous point is if you are only exercising a subset - of the bits in your data-type, as is often true of integer data. -- You need both high compression ratios and high performance. - - -Comparing Bitshuffle to other compression algorithms and HDF5 filters: - -- Bitshuffle is less general than many other compression algorithms. - To achieve good compression ratios, consecutive elements of your data must - be highly correlated. -- For the right datasets, Bitshuffle is one of the few compression - algorithms that promises both high throughput and high compression ratios. -- Bitshuffle should have roughly the same throughput as Shuffle, but - may obtain higher compression ratios. -- The MAFISC_ filter actually includes something similar to Bitshuffle as one of - its prefilters, However, MAFICS's emphasis is on obtaining high compression - ratios at all costs, sacrificing throughput. - -.. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc - - -Installation for Python ------------------------ - -Installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python -(h5py), Numpy and Cython. Bitshuffle must be linked against the same version of -HDF5 as h5py, which in practice means h5py must be built from source_ rather -than pre-built wheels [3]_. To use the dynamically loaded HDF5 filter requires -HDF5 1.8.11 or later. - -To install:: - - python setup.py install [--h5plugin [--h5plugin-dir=spam]] - -To get finer control of installation options, including whether to compile -with OpenMP multi-threading, copy the ``setup.cfg.example`` to ``setup.cfg`` -and edit the values therein. - -If using the dynamically loaded HDF5 filter (which gives you access to the -Bitshuffle and LZF filters outside of python), set the environment variable -``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default -search location of ``/usr/local/hdf5/lib/plugin``. - -If you get an error about missing source files when building the extensions, -try upgrading setuptools. There is a weird bug where setuptools prior to 0.7 -doesn't work properly with Cython in some cases. - -.. _source: http://docs.h5py.org/en/latest/build.html#source-installation - -.. [3] Typically you will be able to install Bitshuffle, but there will be - errors when creating and reading datasets. - - -Usage from Python ------------------ - -The `bitshuffle` module contains routines for shuffling and unshuffling -Numpy arrays. - -If installed with the dynamically loaded filter plugins, Bitshuffle can be used -in conjunction with HDF5 both inside and outside of python, in the same way as -any other filter; simply by specifying the filter number ``32008``. Otherwise -the filter will be available only within python and only after importing -`bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent. -The filter can be added to new datasets either through the `h5py` low level -interface or through the convenience functions provided in -`bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py` -version 2.5.0 and later Bitshuffle can added to new datasets through the -high level interface, as in the example below. - - -Example h5py ------------- -:: - - import h5py - import numpy - import bitshuffle.h5 - - print(h5py.__version__) # >= '2.5.0' - - f = h5py.File(filename, "w") - - # block_size = 0 let Bitshuffle choose its value - block_size = 0 - - dataset = f.create_dataset( - "data", - (100, 100, 100), - compression=bitshuffle.h5.H5FILTER, - compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), - dtype='float32', - ) - - # create some random data - array = numpy.random.rand(100, 100, 100) - array = array.astype('float32') - - dataset[:] = array - - f.close() - - -Usage from C ------------- - -If you wish to use Bitshuffle in your C program and would prefer not to use the -HDF5 dynamically loaded filter, the C library in the ``src/`` directory is -self-contained and complete. - - -Usage from Java ---------------- - -You can use Bitshuffle even in Java and the routines for shuffling and unshuffling -are ported into `snappy-java`_. To use the routines, you need to add the following -dependency to your pom.xml:: - - - org.xerial.snappy - snappy-java - 1.1.3-M1 - - -First, import org.xerial.snapy.BitShuffle in your Java code:: - - import org.xerial.snappy.BitShuffle; - -Then, you use them like this:: - - int[] data = new int[] {1, 3, 34, 43, 34}; - byte[] shuffledData = BitShuffle.bitShuffle(data); - int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData); - -.. _`snappy-java`: https://github.com/xerial/snappy-java - - -Anaconda --------- - -The conda package can be build via:: - - conda build conda-recipe - - -For Best Results ----------------- - -Here are a few tips to help you get the most out of Bitshuffle: - -- For multi-dimensional datasets, order your data such that the fastest varying - dimension is the one over which your data is most correlated (have - values that change the least), or fake this using chunks. -- To achieve the highest throughput, use a data type that is 64 *bytes* or - smaller. If you have a very large compound data type, consider adding a - dimension to your datasets instead. -- To make full use of the SSE2 instruction set, use a data type whose size - is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose - size is a multiple of 4 bytes. - - -Citing Bitshuffle ------------------ - -Bitshuffle was initially described in -http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at -http://arxiv.org/abs/1503.00638. diff --git a/src/bitshuffle/bitshuffle/__init__.py b/src/bitshuffle/bitshuffle/__init__.py deleted file mode 100644 index 06d53b37..00000000 --- a/src/bitshuffle/bitshuffle/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Filter for improving compression of typed binary data. - -Functions -========= - - using_NEON - using_SSE2 - using_AVX2 - bitshuffle - bitunshuffle - compress_lz4 - decompress_lz4 - -""" - -from __future__ import absolute_import - - -from bitshuffle.ext import (__version__, bitshuffle, bitunshuffle, using_NEON, using_SSE2, - using_AVX2, compress_lz4, decompress_lz4) diff --git a/src/bitshuffle/bitshuffle/ext.pyx b/src/bitshuffle/bitshuffle/ext.pyx deleted file mode 100644 index 6c344d80..00000000 --- a/src/bitshuffle/bitshuffle/ext.pyx +++ /dev/null @@ -1,449 +0,0 @@ -""" -Wrappers for public and private bitshuffle routines - -""" - -from __future__ import absolute_import, division, print_function, unicode_literals - -import numpy as np - -cimport numpy as np -cimport cython - - -np.import_array() - - -# Repeat each calculation this many times. For timing. -cdef int REPEATC = 1 -#cdef int REPEATC = 32 - -REPEAT = REPEATC - -cdef extern from b"bitshuffle.h": - int bshuf_using_NEON() - int bshuf_using_SSE2() - int bshuf_using_AVX2() - int bshuf_bitshuffle(void *A, void *B, int size, int elem_size, - int block_size) - int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size, - int block_size) - int bshuf_compress_lz4_bound(int size, int elem_size, int block_size) - int bshuf_compress_lz4(void *A, void *B, int size, int elem_size, - int block_size) - int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size, - int block_size) - int BSHUF_VERSION_MAJOR - int BSHUF_VERSION_MINOR - int BSHUF_VERSION_POINT - - -__version__ = str("%d.%d.%d").format(BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, - BSHUF_VERSION_POINT) - - -# Prototypes from bitshuffle.c -cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size) -cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size) - - -ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size) - - -def using_NEON(): - """Whether compiled using Arm NEON instructions.""" - if bshuf_using_NEON(): - return True - else: - return False - - -def using_SSE2(): - """Whether compiled using SSE2 instructions.""" - if bshuf_using_SSE2(): - return True - else: - return False - - -def using_AVX2(): - """Whether compiled using AVX2 instructions.""" - if bshuf_using_AVX2(): - return True - else: - return False - - -def _setup_arr(arr): - shape = tuple(arr.shape) - if not arr.flags['C_CONTIGUOUS']: - msg = "Input array must be C-contiguous." - raise ValueError(msg) - size = arr.size - dtype = arr.dtype - itemsize = dtype.itemsize - out = np.empty(shape, dtype=dtype) - return out, size, itemsize - - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef _wrap_C_fun(Cfptr fun, np.ndarray arr): - """Wrap a C function with standard call signature.""" - - cdef int ii, size, itemsize, count=0 - cdef np.ndarray out - out, size, itemsize = _setup_arr(arr) - - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat - arr_flat = arr.view(np.uint8).ravel() - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat - out_flat = out.view(np.uint8).ravel() - cdef void* arr_ptr = &arr_flat[0] - cdef void* out_ptr = &out_flat[0] - - for ii in range(REPEATC): - count = fun(arr_ptr, out_ptr, size, itemsize) - if count < 0: - msg = "Failed. Error code %d." - excp = RuntimeError(msg % count, count) - raise excp - return out - - -def copy(np.ndarray arr not None): - """Copies the data. - - For testing and profiling purposes. - - """ - return _wrap_C_fun(&bshuf_copy, arr) - - -def trans_byte_elem_scal(np.ndarray arr not None): - """Transpose bytes within words but not bits. - - """ - return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr) - - -def trans_byte_elem_SSE(np.ndarray arr not None): - """Transpose bytes within array elements. - - """ - return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr) - - -def trans_byte_elem_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr) - - -def trans_bit_byte_scal(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr) - - -def trans_bit_byte_SSE(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr) - - -def trans_bit_byte_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr) - - -def trans_bit_byte_AVX(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr) - - -def trans_bitrow_eight(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr) - - -def trans_bit_elem_AVX(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr) - - -def trans_bit_elem_scal(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr) - - -def trans_bit_elem_SSE(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr) - - -def trans_bit_elem_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr) - - -def trans_byte_bitrow_SSE(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr) - - -def trans_byte_bitrow_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr) - - -def trans_byte_bitrow_AVX(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr) - - -def trans_byte_bitrow_scal(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr) - - -def shuffle_bit_eightelem_scal(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr) - - -def shuffle_bit_eightelem_SSE(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr) - - -def shuffle_bit_eightelem_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr) - - -def shuffle_bit_eightelem_AVX(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr) - - -def untrans_bit_elem_SSE(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr) - - -def untrans_bit_elem_NEON(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr) - - -def untrans_bit_elem_AVX(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr) - - -def untrans_bit_elem_scal(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr) - - -def trans_bit_elem(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_trans_bit_elem, arr) - - -def untrans_bit_elem(np.ndarray arr not None): - return _wrap_C_fun(&bshuf_untrans_bit_elem, arr) - - -@cython.boundscheck(False) -@cython.wraparound(False) -def bitshuffle(np.ndarray arr not None, int block_size=0): - """Bitshuffle an array. - - Output array is the same shape and data type as input array but underlying - buffer has been bitshuffled. - - Parameters - ---------- - arr : numpy array - Data to ne processed. - block_size : positive integer - Block size in number of elements. By default, block size is chosen - automatically. - - Returns - ------- - out : numpy array - Array with the same shape as input but underlying data has been - bitshuffled. - - """ - - cdef int ii, size, itemsize, count=0 - cdef np.ndarray out - out, size, itemsize = _setup_arr(arr) - - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat - arr_flat = arr.view(np.uint8).ravel() - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat - out_flat = out.view(np.uint8).ravel() - cdef void* arr_ptr = &arr_flat[0] - cdef void* out_ptr = &out_flat[0] - - for ii in range(REPEATC): - count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size) - if count < 0: - msg = "Failed. Error code %d." - excp = RuntimeError(msg % count, count) - raise excp - return out - - -@cython.boundscheck(False) -@cython.wraparound(False) -def bitunshuffle(np.ndarray arr not None, int block_size=0): - """Bitshuffle an array. - - Output array is the same shape and data type as input array but underlying - buffer has been un-bitshuffled. - - Parameters - ---------- - arr : numpy array - Data to ne processed. - block_size : positive integer - Block size in number of elements. Must match value used for shuffling. - - Returns - ------- - out : numpy array - Array with the same shape as input but underlying data has been - un-bitshuffled. - - """ - - cdef int ii, size, itemsize, count=0 - cdef np.ndarray out - out, size, itemsize = _setup_arr(arr) - - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat - arr_flat = arr.view(np.uint8).ravel() - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat - out_flat = out.view(np.uint8).ravel() - cdef void* arr_ptr = &arr_flat[0] - cdef void* out_ptr = &out_flat[0] - - for ii in range(REPEATC): - count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size) - if count < 0: - msg = "Failed. Error code %d." - excp = RuntimeError(msg % count, count) - raise excp - return out - - -@cython.boundscheck(False) -@cython.wraparound(False) -def compress_lz4(np.ndarray arr not None, int block_size=0): - """Bitshuffle then compress an array using LZ4. - - Parameters - ---------- - arr : numpy array - Data to ne processed. - block_size : positive integer - Block size in number of elements. By default, block size is chosen - automatically. - - Returns - ------- - out : array with np.uint8 data type - Buffer holding compressed data. - - """ - - cdef int ii, size, itemsize, count=0 - shape = (arr.shape[i] for i in range(arr.ndim)) - if not arr.flags['C_CONTIGUOUS']: - msg = "Input array must be C-contiguous." - raise ValueError(msg) - size = arr.size - dtype = arr.dtype - itemsize = dtype.itemsize - - max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size) - - cdef np.ndarray out - out = np.empty(max_out_size, dtype=np.uint8) - - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat - arr_flat = arr.view(np.uint8).ravel() - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat - out_flat = out.view(np.uint8).ravel() - cdef void* arr_ptr = &arr_flat[0] - cdef void* out_ptr = &out_flat[0] - for ii in range(REPEATC): - count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size) - if count < 0: - msg = "Failed. Error code %d." - excp = RuntimeError(msg % count, count) - raise excp - return out[:count] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0): - """Decompress a buffer using LZ4 then bitunshuffle it yielding an array. - - Parameters - ---------- - arr : numpy array - Input data to be decompressed. - shape : tuple of integers - Shape of the output (decompressed array). Must match the shape of the - original data array before compression. - dtype : numpy dtype - Datatype of the output array. Must match the data type of the original - data array before compression. - block_size : positive integer - Block size in number of elements. Must match value used for - compression. - - Returns - ------- - out : numpy array with shape *shape* and data type *dtype* - Decompressed data. - - """ - - cdef int ii, size, itemsize, count=0 - if not arr.flags['C_CONTIGUOUS']: - msg = "Input array must be C-contiguous." - raise ValueError(msg) - size = np.prod(shape) - itemsize = dtype.itemsize - - cdef np.ndarray out - out = np.empty(tuple(shape), dtype=dtype) - - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat - arr_flat = arr.view(np.uint8).ravel() - cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat - out_flat = out.view(np.uint8).ravel() - cdef void* arr_ptr = &arr_flat[0] - cdef void* out_ptr = &out_flat[0] - for ii in range(REPEATC): - count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize, - block_size) - if count < 0: - msg = "Failed. Error code %d." - excp = RuntimeError(msg % count, count) - raise excp - if count != arr.size: - msg = "Decompressed different number of bytes than input buffer size." - msg += "Input buffer %d, decompressed %d." % (arr.size, count) - raise RuntimeError(msg, count) - return out - - diff --git a/src/bitshuffle/bitshuffle/h5.pyx b/src/bitshuffle/bitshuffle/h5.pyx deleted file mode 100644 index cd7a0f05..00000000 --- a/src/bitshuffle/bitshuffle/h5.pyx +++ /dev/null @@ -1,205 +0,0 @@ -""" -HDF5 support for Bitshuffle. - -To read a dataset that uses the Bitshuffle filter using h5py, simply import -this module (unless you have installed the Bitshuffle dynamically loaded -filter, in which case importing this module is unnecessary). - -To create a new dataset that includes the Bitshuffle filter, use one of the -convenience functions provided. - - -Constants -========= - - H5FILTER : The Bitshuffle HDF5 filter integer identifier. - H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression. - -Functions -========= - - create_dataset - create_bitshuffle_lzf_dataset - create_bitshuffle_compressed_dataset - -Examples -======== - - >>> import numpy as np - >>> import h5py - >>> import bitshuffle.h5 - - >>> shape = (123, 456) - >>> chunks = (10, 456) - >>> dtype = np.float64 - - >>> f = h5py.File("tmp_test.h5") - >>> bitshuffle.h5.create_bitshuffle_compressed_dataset( - f, "some_data", shape, dtype, chunks) - >>> f["some_data"][:] = 42 - -""" - -from __future__ import absolute_import, division, print_function, unicode_literals - -import numpy -import h5py -from h5py import h5d, h5s, h5t, h5p, filters - -cimport cython - - -cdef extern from b"bshuf_h5filter.h": - int bshuf_register_h5filter() - int BSHUF_H5FILTER - int BSHUF_H5_COMPRESS_LZ4 - -cdef int LZF_FILTER = 32000 - -H5FILTER = BSHUF_H5FILTER -H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4 - - -def register_h5_filter(): - ret = bshuf_register_h5filter() - if ret < 0: - raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret) - - -register_h5_filter() - - -def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None, - fillvalue=None, track_times=None, - filter_pipeline=(), filter_flags=None, filter_opts=None): - """Create a dataset with an arbitrary filter pipeline. - - Return a new low-level dataset identifier. - - Much of this code is copied from h5py, but couldn't reuse much code due to - unstable API. - - """ - - if hasattr(filter_pipeline, "__getitem__"): - filter_pipeline = list(filter_pipeline) - else: - filter_pipeline = [filter_pipeline] - filter_flags = [filter_flags] - filter_opts = [filter_opts] - nfilters = len(filter_pipeline) - if filter_flags is None: - filter_flags = [None] * nfilters - if filter_opts is None: - filter_opts = [None] * nfilters - if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters: - msg = "Supplied incompatible number of filters, flags, and options." - raise ValueError(msg) - - shape = tuple(shape) - - tmp_shape = maxshape if maxshape is not None else shape - # Validate chunk shape - chunks_larger = (numpy.array([ not i>=j - for i,j in zip(tmp_shape,chunks) if i is not None])).any() - if isinstance(chunks, tuple) and chunks_larger: - errmsg = ("Chunk shape must not be greater than data shape in any " - "dimension. {} is not compatible with {}".format(chunks, shape)) - raise ValueError(errmsg) - - if isinstance(dtype, h5py.Datatype): - # Named types are used as-is - tid = dtype.id - dtype = tid.dtype # Following code needs this - else: - # Validate dtype - dtype = numpy.dtype(dtype) - tid = h5t.py_create(dtype, logical=1) - - if shape == (): - if any((chunks, filter_pipeline)): - raise TypeError("Scalar datasets don't support chunk/filter options") - if maxshape and maxshape != (): - raise TypeError("Scalar datasets cannot be extended") - return h5p.create(h5p.DATASET_CREATE) - - def rq_tuple(tpl, name): - """Check if chunks/maxshape match dataset rank""" - if tpl in (None, True): - return - try: - tpl = tuple(tpl) - except TypeError: - raise TypeError('"%s" argument must be None or a sequence object' % name) - if len(tpl) != len(shape): - raise ValueError('"%s" must have same rank as dataset shape' % name) - - rq_tuple(chunks, 'chunks') - rq_tuple(maxshape, 'maxshape') - - if (chunks is True) or (chunks is None and filter_pipeline): - chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize) - - if maxshape is True: - maxshape = (None,)*len(shape) - - dcpl = h5p.create(h5p.DATASET_CREATE) - if chunks is not None: - dcpl.set_chunk(chunks) - dcpl.set_fill_time(h5d.FILL_TIME_ALLOC) # prevent resize glitch - - if fillvalue is not None: - fillvalue = numpy.array(fillvalue) - dcpl.set_fill_value(fillvalue) - - if track_times in (True, False): - dcpl.set_obj_track_times(track_times) - elif track_times is not None: - raise TypeError("track_times must be either True or False") - - for ii in range(nfilters): - this_filter = filter_pipeline[ii] - this_flags = filter_flags[ii] - this_opts = filter_opts[ii] - if this_flags is None: - this_flags = 0 - if this_opts is None: - this_opts = () - dcpl.set_filter(this_filter, this_flags, this_opts) - - if maxshape is not None: - maxshape = tuple(m if m is not None else h5s.UNLIMITED - for m in maxshape) - sid = h5s.create_simple(shape, maxshape) - - dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl) - - return dset_id - - -def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None, - maxshape=None, fillvalue=None, - track_times=None): - """Create dataset with a filter pipeline including bitshuffle and LZF""" - - filter_pipeline = [H5FILTER, LZF_FILTER] - dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, - filter_pipeline=filter_pipeline, maxshape=maxshape, - fillvalue=fillvalue, track_times=track_times) - return dset_id - - -def create_bitshuffle_compressed_dataset(parent, name, shape, dtype, - chunks=None, maxshape=None, - fillvalue=None, track_times=None): - """Create dataset with bitshuffle+internal LZ4 compression.""" - - filter_pipeline = [H5FILTER,] - filter_opts = [(0, H5_COMPRESS_LZ4)] - dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, - filter_pipeline=filter_pipeline, - filter_opts=filter_opts, maxshape=maxshape, - fillvalue=fillvalue, track_times=track_times) - return dset_id - - diff --git a/src/bitshuffle/bitshuffle/tests/__init__.py b/src/bitshuffle/bitshuffle/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5 b/src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5 deleted file mode 100644 index ee8373f7165c71ceb4f62d04bb988f41cfc5524d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 114447 zcmeFa1ymhbw)l+&cL)vv5`udmxVt+9cZc8(!8J&54-z1_I|O%kcL@%`HTj@{PG>Ti z{?@Gjdo$gS!>ZcH?>@hCs@S!wZe2eC9zGaI3`l^R01gfS0s#M8^nUm4F2L0R{+4y0 zzAsn5i`R+Iaqt={9dfa9+G!*53o06@rH z`a%4)3EZz=^}7XFa5tPDm-`j@KQlgQF>xM103ZPHyY6xC*6sbL{^L5nKfe-z-8Q(d zCvlfHgZQ7(^S`~Z zpYzWI99B%Wy5q-rs!+*pyw1mfrXbCM07E!E+4*c5 z;}}K(kix@-d`k2T#K+QWF@6Nq2=zUTTd9wsnS4*8IFGAiD{nzo@Yy>PD-f$U5^6v^ z&kw)U&&^(ZxuC8=SbUt^pcDE*w41+&FS73~9Ae@WmKZxZ?^sG-!LHs#q~o~flStQs za`=5Axm^5Zddg`gqx~;wPUm&J?1LUrFC_yn)Ygy>abPzopRAV1cEp3QIyF zUe~eV4-@aYIq z^KCCo0H4`~?U6TZA9ZQjEG3vd9 zijq}7HK6*96oHi~oekjIZ7)YJ2=awJ1=_#|YAgG!w%bF$j^(d3ioun!z*{Pzs z*$K|7mSkoF0SrM!?xwXnCFfw84Ne(m)1H<^g{EEi0^!<{il_ROV4Du!IQz7rf=@zu zGdsmPIY-Mq1-^ERtVRH*xd5QJ;ya@Lt`B#&j6(5=`2lSGKv!$)hwg{-{LI zB3YDLniSGgL)8<45H_ZGZ578E;bF?8jRnQCWtJZaEIYNsaPo2MY&6Mr(QWLbW57zZ zQ|AwkAaJGzFrm>xY9R1Y<{Z@*vHUmRNK=@FM>IVN_6tOtR`8c(YqZ6CYP-$6Izg~F zq-9QjJS`gpla3ET&2<*3inYZhK*b?R~c;70q7!QpZHa__A%>M|*Zi zR=9rQ%*(5!K)hBTvl7Wv*;PC!8cm=xz^OU8wuW5&anV%y+kM-xmmqMry+{j6XQnf2 zGblRR$bTTSbFQK?P{QJ+k@9?}oe8|7tGeYxvlLKkS%kgaHFpZwIpHq_JKB#vU+MBm z25=MoDa>^*7<&*d?XR({;*=!iPEpAMlt=*j=Rl0gYzazk#*cQssKx%_3>CzYzKtWR z{9NNGrh?N{%vWK!EHQ4`oW`U4C`Th$&MpGO9VB zx2TPoW3jT(S_RBSja9}ei2J1guB^wIk`TobWP6k2qko=5mi(MF&cuNjK*_eM3iVE1 zEAt)HL{~bkRhvb|Viy4iwi!l+gVh%J95!}_WLY3fu*1w&&qaGj7^zyVoRFmRZ^h!uN zWefe|p>p0(E9{*2tIi~sRnp~ID5Wd0SWB(pHYI%BwIH_qZ<>3zz0{s>Ys{!`$zwP? z9*aFBoSC)eNvW&{)^CpqR(rh42X=Y_j9?Y%UzFn0ZyL~IUcoBU3N#K$yNY}|F%bQ< zL+rK}a$Fm1H`ZN5AE0Qn5#-`%;+zT*<^!Ou4@%1*T(zUY^PgSb=;l-~3kRR*y~o%I zm4hYhpTuov2NP8Gk~cKGzJ>=N*+XwItEFR65Y*+gJ#-`R?)X$BHP_wSlbqH6$q&hj z@y&QIzrT7s24u}OjtX}l2?zi9g=&TLa0*Ci4JUcGm!Wxz64L9 zf|Z0tP^h5himGRXMKiZs&XkbIrKy@ErWZQp)4jE|W`po|n~99w#_~;v>tl)G2SEk_nhsA^f?WHP#&`|)*WI?> zVwYsTfqVXSnAv{FWL0KJbf04wael{ufXD%8!qu4uao4UI=6Hx~$&x#j*Zv)-UcnbQ zU2TvMyJzk(PF8>)$+ObxEjVOy#Oj`&k`ts;Sb7so0k0`p)*`M6>f|L~IDUZWN3AJ^ zNh1ifJ^+SexJptX2PzzZr7ajaK$>a|U!OqDEq+8=xHjK9_-IsNZKCK1He9``ca`|k z>Vl*&OA?#?%-c%QYY|euU*nnCBbZD$BZW<-mm?<=)0jfq5Wpo z{BmI6cO`3oa{YsQS5KezmtFl`LIKf}X<67$I;ObjhnOQPXQtBC4>04`3T)TZJjSTExlbdK*S{qP) zul+}UH#2;*aa?ae*V|3_-HrtUiblE3y0N8xUkpFnvG-TVx0U~K|BvemRsZ*{{;}$B z>HBrXf&5R4f6v6di`l*SJ?o!F?$?#R);}%&%VgeMN8em?-}`~@<;}m%e-`SYe$2g@ z#GCN1>EG6Q*gp_>An-uofxrWS2LgXh0)OV;yz#8ugnRk_-@lm+{bS49)X#RI?`^?< z$^Osm@_P>V@|QgR^LF_f?k&?BRev|K)1Re-H z5O^T)w;=#nbUhT;>&<=7JX8t^xj976_LwWfZ-IaQmEmFUf%hI=p=2J9^-UaDzz{J7N&RgwFjAAf^Hw4?lY8-NxL3B`HBTR3#NG-2KBEgHP zn#jm3DboG4ki&!$D&tsSj4r*_Dk+8c?cB9@XB2<5otP-pgGLr%j5ow(KmB}`zw$DY zHzxb5+!JOEUL?AZ(2>zbK+_BWY99N+y?)(TC;CmSRKGlc@M39({quT=iPtsOtqtS~ zmAk&^7gfB`1Ku2&p&w7|#Ba6WjYe{lYN2(6t9|S{4axnZ zO@oXx(Alw8f~{ls2D@tXwWWPy6o&T@&EfU>(LvQJCx%%2-HxDf*@tZ+!JhqRWag~P z*zTgR@1jd5IFDLj=$uHx&UGGxU6igDKKcx2K?CMX_v(uutfN>L_IA>)YD(}6s5hC$ z9YdvP9_^?hxm>(x(k$=mnt`V;#O-@v?KdQC4h!tlapUD+eIXNJ%#J9a9G1&DIl^XH zt)AuPpV0a=Nl0X-B54hmUwLD?S?;HBH+@wvd<4H8Y6W;+k&&tK1wJlCO^f|aQ0Xbm z8kI!AoZe=`mI{7_?je#->y#Z@a=ZXzi+~!h@?vCA4$jlrf%$6B>IAA>({2ayu}y5``Lg43=j> zK;ay7%zed)>as@(?ww!X*z%&oHfx!z>C6lQ`Xc7d(0x`wP6jBm2YwpDK&t;Cy-_DV z0}n7n8V*&-<22*_l0_55t2BNPUpOLMH~FF0OT?fr-gV>VehqDK;lo(SQifrdP=1`o zZj}9~wph9KN$@_s@Us`ifeKh(7};K}gU~{zj0&WA!;w=bboYfc50p5~-d&Xec3-ar z*V|PI_ngN2BH{9VAqXoKkzP=YnLrb%G{G(`kv3VGjxhia`b&uI6aQL^7h( zhaF*kt3`h_NPQXo(zj=eSp5@3Qgvl;x~H}lX8wot#gi&XFdQhmQ?F4KA8}VPY(s?L zy+H$NU1+MJCH}(8@MsT)R)VGSWV^mdlGlj-qI?+3|1oBC$qLkD3{l5>N(a7=WX~v9 z|cVt;O{>Z<2u2ZusVxWH&$}n2gk;9&#Eogl zw(g%$`XMwt$dKXs1Y2gFmXJSfSY-6OEUuGMFfhu(K8qG){NUL0?h?VG*2RjVscjt9Op6P?{d*ia>Lw^9N?!wE_kTH8UmS;;ia4A5DnH+4TNrxV?qp;nC6{62Nca0HHejDu)O6#r%&}h4TH& z-6Jp*MBakPbu&12U0}rdt6O?(&`eB!siTDtUKep~cnN95Erb~Iu4V|}@iz6+UcdLq zyh?$Eu{{0)VwyoweIN;13)RQQ8PIMupn}A_4NSPn4nHY`h05)o7h!|>ohq^&k*3rz zo=l+TF_FDYfK^j-1Yfj6kEK1poP)YE?lfuxw88aSu*$~R!507PZp z-yYk0?Ru5`mxe#{C#~M;eJ_9MKl$nYWDxsp)9xQU4?p`e{#yQ|dk**VmpuOS{-kmK z+X=W)^>=s9&;0*Af70F$vwF8ku7x7!_xMfhp40CN{8;>udLZyX;DNvcfd>K)1pWpD z{>+}-xEtfW{H6W$lRYWKecN>S2g~DUd*-iYPu_F5m%rrkpSLH8q;4nRM%CZVxu5y} zd-mk%53_o=NUnu~;`jJX?4Hx_3jA37ka{5SK;VJE1Azwu4+Q=;1OSJxhvIs@UEiPd z#G)-wyVzMM^smU}LO^80!B_6oaTo0a);$S1d=FsAC*UTu_mu!RrsW%eP0Sb1XGV!` znFy~Hlt{wU3_g4<%HOIQDn+3IUVg&HXWW2(LQ8M22V8;Rj8@;a{H}b2Mc)MlR%=oB z0_Dx+%e+mGkwQzg*pA~0UwK#z7-r^CrCTldq&2?TL5@l(a8?mGohce=!Q)SqR67(b z;!g6pO_AX=eJXAS%X@}xR^Rnw(ciQ$Aeq+)tUWZ#!fWrfwNR0LuCNIRihLV3cAZan9sUv?)Wn5 z#>l#6;g5&|yFLOe1<5eW|Gv@}0#}FbH2|<+KVNEm(Mu?fjpaV|>5X|pP61F3pK*X= zl6fXGN&85dQN;B1x2TzoY~86Jm&N@mXuSYk3&l~#g7WrMflkXyd`=}UOGS9ILS)}nAbrJY3nv& zs9Ku5{4(Nk0mvKCWU&N1u znrzxJV8c^{&5LhU-A)mLIsCfqtsZ;9@iuZo2_L%=?sUm!m2Nt^3aR;U2vKPTA)+IQ zE4)OVYy5>(;VU4tfamEmER!sW(t^#S%p`f6^F|e_I3l35*6$&LCkS~Dic)*qee_9J zI}!zB0=7ame0@3N&J|42xEV9mt%6tR`5w8A;Ht?u7i#3D@_q~o4~nQfm~pBdzAQeR zjF5Tl&!A*EOLqi{du zc0w7iPgoZF(il|KsuhoE!5beih)l7?Ra+LRtA>G4KL|y(tPhM9AT_p;h>n#c@S%Yr zc$PL7*z6^~Q7B91G?rw}RKoBIy0nQ9p{Rt^f*l!ZA4b$a562(Eix&5*X>QS(Ojky9 zySHpvhZz!KAEj+FcAp`GpGD;X6xo_ArLtUQcS%F8^TPiMuS!?Fk0+&`oHBW@hjCkx>}ZVy%n)DLEgI|+EwPm`AY z@i4;l4P#A@!VYT-g9vM2mRPLk_Cmf*9E&BzO>zv4q66WJLz$3Q)nK7YNUx{B3(w}YWAD8jmx2~LP( zNMju~AXK)Z3|}O;aYVNxMV4~nd<(clUM;O3E{;ksnoJJCF|66Xk}uREArQTg!z?If z^IV@rtodhQ`&4(YJbb>k;Sf$c$&S6q$iDP%se5Xt?=)Eh_a{g=UOt(ZvU^Jrp{K7Z z-z!%dk2q^LVWJ^oy9PJywhTBAZS-Y)?sZ9(B&ctqX~G60;Oz4tZ{!BUUEsdi6#LG# z@#rTkMgIEUTzGA^nd2}${7ym6X9FH9&H>I;x1&ht+@zt+%U0`EpqXRQGYDtV9a4pt zPY@mtGbestgCK zkDX0{OZT{zzypUSwvnQMG zhI%i5X+QmRf3m{lwyEF`mdDTb%zu|XsjRi0haP&6Zp+@l;J;z?;d28xt4gqrI@6|eW*lf$S z!W86+UHp(c&Q>Bx6?e&VXbw^n1;R7W;K`tR1zR{{nysbFFr6zzY`I|N_gF|*W=O=| z&Yo19JOMoOi-VG)7COZ6KJ*akiYyyLqYE9_S9P@=O84Ge-K`eQEn4Ov``fmPG{HMy!A=B=uvl`9Ogly!MJ&iSl^Qvsa6j-*JASxH}^dvAe%l6Cs1)p)+I?`p8As4uh=(K(DRg`a67S%mjj>`0S z5EzsM^ib5IOQO_-sI=7svP~5l!B9Gw)}ty)$zVkM)8sD9O~kfCmy6; z?MhG#r_*>(SvY;A@QxKMfHaDV=Np4Tc&&_?L6P2w{ha8Os1ne}U&n(iyr=>0X*xtH znQgX$xUIS!!>sVAGt7=8gWlsNN7>SM-ItWmR%2;Jj&W~lg2y)G5N;Pgkl?kfzuvCB z9>|pfN7z=-qBb>k;)0bWm{0Y}0?b%0bc9QF4}$bxq&WM}k+u|dy<>Z0yV`=HmASG$ zM*i|W>T6wOd#WS3Z(s-O;S}b}K;5k6?c#!*yV92|VMSSRW`vEx#TR8x?CZ*3UU8(n>8a9&W+JKE)VvJhQ2gGt` zCY9B8n3f&X-15FK4l`SK({;7di?qNDlB=v$o@a5J_E;b>Z{)5Yq z=1mx!`P4Q9y-%+oO$2J^?2Aj zQhF(?shF7wIb{dOw@}8ZJ{Fs{dsUpYoT;QCP@jjxx+ana^S?slFioEtmq|cpfk>Pj zmxWSuMg&}H8YMbmeQ{yEZD<`CqNl};YDkm|%lSk;`~5;}QK|bQ`4Rdmh4z=Rws`E0 zuQ8M7pF@&}2smO6AS;Y*Kh^gO8K)1+a5A-s`y#Mi|81Pm#EL0?bmYU~E1cBYyrh)F zD~!G`Lm`U>C7t-EJ6<3azb~_thQZR%t0uAczGnG@PWGEmT3IxFjymgWT0TRC=EfNJ z4|diis}U_0^9LsP^ZOxy#yvm9AyfV=pbDWbP(^44h_0E;*oErk5uxTfkf{1DIDT!a zWbA2wq+ASk-~O<;0OH{YCrXVVUCB(%3vxVSj6ZxuGJQWG+r= z^k~oD+v-4{J?3zRz0A|6oGfv+jpoNDOBrR;k@vh{sA6(_VskJVy|#?5l8zb9XZBpg zCT8DuikjPY5Pb`*QOI;JKrJo~g&9QuqfYi*rY^QNvJl5C$=4PQMH$kJp1*gx}^AIdkiIxI_EB(;$%A{{5&bAtHwD* zrv~cwEn>nV9n(418_d_WJ#j40l)0SF!&>p#PsnLsI(IShvBa3oP`?04I*;XDcqC}x zNeKgJI5`{Y6RZgKLn;e+={L7i7<T!Yk2pDXpeb`mGV9AWC!|3WjftNjgO?anHsY!-d z?HeS*TwSqAousQlUF;_<6(1WmWKtGUCSC;ano&f#iMYq8D6Z^$;4Jh!SCy=Jf#`X~ zWh#w@Ic`2L*UQ+NjG2Hz6tm8m-f?7{v>^i z+m_ltSROyyGyh%wq+6Z;I{W+I*}At1(TM-G?nHOR6*s?7_%FiE(*`#IT;f*#YR_?3 z@X`2JHU1j;e|8;lzy9y#FOP%&{B;Dl*DdlJRsZelh`+|Heb@3Ij{_o-?`8iic7GhS z^Zm2Z?|Jvf0ou)5BmPC;xGSr;d9TA?IQ;7OL41EWi+>*=L*V!LP3-&e@nil&>Vd!m zfd>K)1Re-H5csdLv84hTe-)!|hy##P?f0E3mdx9OnIpD2=s zV#7ZOLvfi1_sVC9S~KKw-#Dh^7e{L7uC8ElZ)UbLx2k4@RScJ34q)oinw z@ZMKlCd3He8aYf;O-aK(zm|1mz`eB3Rh0Nd(3Hh?E?i=HM|r&YbrG-?ATuqBy>qbX zf?bD3wCgaLnSdfpqS|N4elGXvDq40ay53^9HIo#lvGY<@NnmJL?LyaywORqV7PtL; zv|(53R`DwPnW{YJ@kae;Zs;FoY6b%}1M14bj7q2bYo|NY?F1({0PxtM4^FU42!n;f z#;>$`=A@dGpYbOe1J`~F{Dg#{{p?-uIsk1Gt86&aZWs{%OGoy~%Bl2NtEzQ+H-wwV ztglD$^)1jeM@v(ogv$p?A#plk-*ZMvQ|UI$5sg;PG@nKHBUp+ohn!&74wJ&ly_^YA z#c`R(#L>D{)w$}IGsO9a&)>DEZ@I6-Sm6=u&X2Te;+bLbj>&xube1Zc z0*DDsgaaqX`G$XZG_30;Rm`5Dd&1*&)#0OTyWUJydGa~O{~X}r+aYRW&otJgfmx}! zk&z#0Yi_+z^d4_($?KXrW5%wyHP=Z}q=q*j4R}>zghq8Fz4oVc;n$q6P1S z7o^9*ZP(T@VB85O-pxOp$cz5tJ+Bu}KyWn*mjXzW{96pmWFu*%HyOnGYVg#d?Uy!F zhsI0?%Ic+Q4P%OqAHh1mCXB&!VbZl`6k!9eJ&?u?=cNBa?o5R9DP6qwLnbhp?$i_f z#`j=Ui_ugO`ca?05FESkPlLI!WS93nUn37uIZQ>K7xLoW*pTtoUmATOeyWMS!-}sMcv7C;N~U%C7w?PXZLn7XEQKPoM09 z3XCv_*64VDt!GuJ!$hR+9$`oFxq5vAsy1W;dUZPKk8&?EmyX$O>X6=`KJCtbtB}+A z{!O!NQZ#OS2?A_iFlG-ZQT~~ei?i!j$QVB&I_MOM8gmT^t(dEV<^~L!s^+WDY7t!= zAKn$rY?4PcCWnp*31-PZhwhNKe}Y?O4#inv2}$uh`xCg(h}FpaY4S>DhB&NTuie)x zFi-3-1aTucZ|*cbwDU2Ij%Qw)vvTUW9SONFDF~HvnmS^ioq#*TMhBqoSuMG-y;Fg6 zXeJkfYiV`oElQ_39Pg&7E0cyK@rK*pAlM`Vy8M7U^O@Mja71G^vQJ*yj_2?TY z3gfB)8GCI-HHnN1ti)kHG&}>jsDZPKiB;x?cd!S9ewOy45JRAN#B&&(3S2(Zv6&G*k zQN@co`#jmM-2y-2e$kH>@nbEKMfzE+kt61IjpckvnyN7x$$Y6PiJv~Kuw^+-=gJuF zBuUAYy&{DnJm>~B?%^EFEO#uU!ajsSX$-~CIIwRkkt4*Z2u4cm-ir4dcUS61^LqaX z*1=DGbX}d;%Qk!76EnF<=lV!_2oD7Q*9rWY|G556qkH*BzZuAN*{{FfQXhQR^miZD zP5QsxfBa{D#QM8I-OFF{_+RoP-pm2V^>**x`NQ{H6r!&4uS5Oyc2n)Q@H78=&+~na zALXz7NjN{u>i0Ij@4v@yV)b{)-xat^KZpkc4+I_vJP>#w@Ic`2M&QOcxOt%(Akf~I zNTwD)ndQus8dStIB-W||JYun6>b0-(Ij^pN$v~wO9D(gK>?JkeoA>W);J>}q`FYeGyuWDCId@|_h8 zD@a@*na!HynPxZaaEsBXX@l;HKsp*cL0PEKZ#xkJlzPEM3)jbhp7Oew<2jCZp*1yUL@dNHd?X%B zXO@4+3ANOxCJO?DLUM&Ek56vA=`THpU#hIvfX~5nBE{wNZ+0u=>R*j>yF&gGt-69#l(*1hI{KfyU(+E)?42?DzlZmAWu|| zNw_*3#VB4~$YyV^$$1}PIyqfD2u(A9>luYLWoVL7lN)f*$WbtU8 zlA0a#vqdtSP}0$qDP@12%nJ?Ro|&%GyfNWVnWBJLa0dEnp^yr=Z&q?&2&*nV=Twy$ zvx8eeuq%qEnV8q`E_Gh&QGFKNTe_!AOEZ}*faIkJBbk-OOpx!^x#QNN-zOtpGzC%d zd4i%)yYQ)n^Pw?xTF#?aIkU`Z_TXmHm@aWbOFYAQilD1suteHkCN8VSBNHTr;dLyS zwj-bkPGXw3>`9+-$>AR+YIwutUaYk(Cy%4FEE^V=%d3xeuZ^kMd zQK3qEBKP`wpdmsmYQ32uDxSNj*P?6Y9F{C1pcX-2?$r>fc4ftd>gJ<^boyuq;2n*} zaTbe5Etfj~<*;ZW2aXEuYN%}0(VLX1Qi2;c>sDOkQST-dz)B3b06k8twPYa;JK zOPdndfK7ICT7~MEoCI7nDW)v>W9>W)@sumR?Gq!d(J$(ciaLNZ*SoWV7Qh%M=NKe- zZ9bQ>NkvJpGvqhD+wj~kzzMMuY%zk%8)ydSrSA9vaVt~F>V

tZ&&Q{$g352WDmFNJ%H}o#+RYCNW5*#S%h$_OvN3N@0`Wm63(fh&1pNPJbD= z!f~fjccrH+az(|S{=9aIxhg3yt>KOXOnMWkwk_#gAj=vIw;A&oXN_vR+6oPvFkl?$ z%~}wiAViCk#K-#uBvxcqf9b&_COQ^F*Avw@h(J{D37nTCUJdZl0T*cS(pPZ6h1QuMWm(MYm%46crf+s~J&6D? zpmW&l0sv5PoHGb6U9&Dg7;W`z1vMQeL z@*<40NS0#-lo@uJOM312ZB6T81j{?2JLWbXzR<#yzuW)B{~P1^Gk;Ro9fo`P zOaIAF&m*Satq8IoJP$wnGyc1tNBlE?(mjWJ`AZ)Ed4JN!%-gxUQT2Cs&d>b+J%5tK z53~CHSQ#(;9>0m*bNXF@AB!JS4+I_vJP>#w@Ic^!z~6=dFxhv1Qi!|rqI*bM1qIIJ zB2Oh`neV4{jNyuxovLzgs4$nXAFZQU~=+avC zdBE|Dah^@#6NRA^u0G3v3j`6YTmu9an?L*(8xWYlw%U*U84YWrE^b)Q84P%qFL<__ z6A!CVCf$Y!1=theW6Wn&eHtHL6YGKU4})Byfx98D1*)K!(pR^hl=QjzFH$jW1)XV9 zlX49WV!fnGeR*{>(*uqLAHq0DI}$L+6p{f76O`J$K6BTtjtxFXHw}35lHhCSBZ>Yuqs2{^Ps zq>0q`5fmN|vQMy~<$0PfKUvwYc#^NlCK^6AztAtOlW~9$%)g7Mboq_K!_66o4O*%v zR)#CqqZcuy1Bnt{mjh?kSZGHX7zv`xp==!oImS`tea0qxZWVG6se!5>p$22)kvA$X z88-J3JELAUU^nWXkVR$kqcYp@MxIm3&K)d7=3>!j(F6+gO{&*V7Tx$3Rb*ry^+0|m z-%wB^S#X}0@Rf*IEzijaxo{(O?pMQj#W#)UKx;p9{FL;uyaBIictAe5BxB#?1QiDa zzp4Tc(4n3|LAv()YPM5XM6=LUSg@ir%0x$`cUSCBs+*#Pax>;vli$*LU>GHAq_zra z&H{QMKebeAf~87Xm4&5D6aNsbos#T!ne%QhY6)Y$;3WE+MD_-xgZnGPLa}Eb15w%C z%-l;)4G9*tXLch_hzL`1%ss%wm2w&6-Sp4xU#F&OZ+v;GmoUXqn=%=RX)Cq4$5Iux z9cWd1cp>!KssdFR!A$|1K}17Bv121l8slV<9<(uMHeT0-Mv(hi$*jxu0gk_i;Kn}6 z7Yz5joq@J@;A)JN+ec1{#uLwQ1t!J8P2KU*@sC&4Yuh1fyRvi#kKvnhGBh{`oAW_6 zmQSWDUWwL0W+uUN*Hv0lc64i3!(fj>4M6O4YzoMe;5uT_iA=fS9$rbvaF;lhXFI*O zs%T~m&%$FP#L|N1D11#p%AO>bH;{bx$+20$Zj~2<*78(rUnJzHQJ7}+l>xn>ypWNg zmc!+zFkJ6C%6TO_+@f7P{E9VubE*%=ty7Ft=xm-TqC|KrTulbb3FyjctZRfZY2mYr zqoA+gY3uQUapin_++Sf&O1}#EyivGe6r%o!Rt8vkaIq{+##L`ak8XMwTXm-dVvTLx z$jEs`Gtw+d;RNc!AQ2XzH>x&^U$|P5*%6$I3m|yBTSOz~Q4PKyfuBzg2TT?(=Rz4#T0RxJJZHgzm0e9DL_Js5+7FbM!xW#kSP#2B<@A&wmo z8Dc%CuQrxCvdl?<&Pa5S}G+vVZIOvcJw8qhzcENI5s1(pLAVMD+9I^32jE^Q61m0 z`^X)iNTOp1Wi!r!L!*raSG;qeBXiEK#HyZ+IIj|+CH!FE7GSK|Spr%PNV6(mjVWfo zc;(Ij3gZ03Fr$D-XbNpcuyuj z#*!uAcH(|UY%2y+m~#CFm}x;#f6RGjjrm*!5(Ma_#mMT{m4Ho|wW<~j!c;?H16OU2 z$~9v6amwWW$F)l(q9##!IYNVn#nNqz53eRTRm{;QrhETm1_Rid$A8|Rbb9lK^6PM;>hJEHpZWiL z{v`b$X7#tv`E5tk{5^gXyXW+~0zVc%q#g)75O^T)K;VJE1A)H{0bri*{-oX~OLVv% zq=t|P!eiQ!6|dM@Bp^8y_Gp{rRnOME8!mYK4!xW@rS-B4O*7^|%JeKJ-M- z(^bGd~sMq zu%B(TPu~WMX((LS)Doy7a6l;}=`9eb``JJO+ptXXBjte&n`8VtZgVPiqPoveEsZKm zrG5U)5z0JVP`MS~API%Z-ZwbMb}-lJh`2`&^g_UgQWEbQMB!R@qOR{mj0?U2N$$gy zgww{+c`}cn0o-6flHyZN6sN2D(sGj@jy{t&&KJ>n+_y)E60+qjyl5QGttVw@gV!z& zYy|O$aCcK&wzy1?6CS+1ZMe@~XE{(=ZG54<-YZ37IZJu=OngfLPWM}&%K5cNH95>aA{aHDug8*nXO3fjT|cvD{v zprTe((mpjGOnh7<{sGImaj1jHo1jU2TYE`tt(BD05Pe&1cXpRXGz_Kvo$%B3Au#g5 z=w{sLm4xX2X^vF4F76>}z{WCaSp}pQc;T~PqysX=JeZR(p}s(p^&iFuC3jz1S?X~q zB`M~N+e(0ZAfI{Pb&iyLtea;+7@Ly%T8VisN`qlgatP0Y?JE^iW1!ur-Yo?PpZEo^ z1O1Poo;fpKQ^UzDubsyYR%&fJNnDsNEoj><-f`6p*3cex2L@P_kXx{l3wHpyAz`_5w84kEmzt58DkH-HdKRW*&t zIER}&D0r?BMhUS}qURl-{@vI5R6Im89u zd%SX;W3>7vS4+_e`i88BuI`I~5>$iBr{O|0b9V)R9UEY&S$B&6{`1aP3id z0w4p(KugHjm@WVg(D5pmyu57WwgdAhfs6QT$8JKrv-doy*$*3m`XuqpCnHPAiEwCL zKn=Ti>ua9?jn@wP!!`2X=0y20kOu;PD*}J!PwKoI?Y;b?{{-~9?APCSBf9yG$m>w^ zgDc@@f5uP@xZ!q5EwwLj_4*2l0zyDq5kK6aQfdL5IDSdi#Px4;`g}XA}n;&)j7eW25Y`pScRJty@fBm&U z@4v2RcUP?6|F7%a(`7-v`TokUL&#l`GRnWM_v;>hEcvtNMDN$rz5M06`p+L{TY$Gn zZdCoZkFy`?{J%?OyF}o1$ugjDohKX3ytlJQOQ0q(U?uMLYz-+Od`A0nF79ZFTp#4M zq^Awmm5iqya2Ykk5h1RM-@E60zM>ZOugT4PVN-)v@*iDxGWswkaOxMPO}h=BPwohq za2w%)$%#W#&5h&i(=Z(X?PBL>bRNfm0@F~;IG7i6*rhpxcvVZ9xVM~QIaCzK#cyaR zBSV6?Tkm9%4+{k$jN#1qh|CX@qVJ-Zu*&%?X4a=&^7wbr3h(gGYp3L&34L^X7O#$~ zLfqLCkJ4ONxN+EW7!DQ)0$wj#N%Y=3u@6LUg$A8kb|js7QQse055w&%p>_orogTr( z-XnyZwv7N@*Q_>gwCMf)^dR=Wa4%Gak1Z}IsnETNG3w`b=TBNv{i=MV&*6_XX&Z{3 zD&g=4Y+Y(R50RETy&{l{8;RtmnqQj01UGM{NcB+N;aV?XPt4W~x+J$Lsl^y;JatlD zNY;Yu!`}W>ueZK5AE~hKL+ACjx66a#OwAu9Vdq7mwKidC&hlIfePki3{!8sokG&9Ejhd8ac(SPpU1?QlL{sv>y85jWe>M*%hSFMt>+^ARP- zt7BI+L~F}U;c-_i!LIxn1+%+45E>!@*9MHik%6iX$4BvsW2g!AD>E3p3pHw=3z#j; zqD*%Y4IYtuyyV%tER$wdk=+1HavE>I+~$*D-%3Gt^%fo7q?dyJu)q<`$%zNuplFd+ zRsT{{H>F*!?j2Wu#snG7m$2i5^>QRnkXM+}fIU0dSf6Q~@?V6EYUc(aYj-a5Qu)Ru35`MugPp zwESDmgwN(a!C?)hI_a)^a1+ps&Yu0CVpNZe!;rk_*Jx1|>7Qpp!p4js#}zj*6Qs$$ zCkkk69vSa~GbZ{x4lbT@kqQ_X7xtc%Gf6Mnvhb}ah7;diS(uNNMgBmQ+U3E}LTc*UuC}~hPt0`8R zR!3lLFhaD$I@;u)+iyKvb@+6+43s2IAlu(r7@r^q3?PpVKb4ebm%cL}`0+XLm?wbi z3vZsooJW}>XoI;|NZwmjK*O9GInbO}TL-<|7PL&s5h)j^nJRW1Oe-Ldd1Lu-_;QN3 z&Q3$j8l}Hfuw2dPzn2Dfi)oxj6VY`Nk>8z0d#w@Ic^iLja`a`u?!%_2#kn&BBB_ z=THbp;w%|qto0dAu)(kdVeRIt_&okYw!!zuUI}^n)z}&-I^LU1B}K?S7pR`?bRxPL zjaZLmy7R0f&^T}N*&sACl|h-eyv`W^Ek^&rfHUxdLYgh=2}foQM$6mw z@nPXo*+JN)cJ#)#kL3W=0p_Rk4mnmqUSum5N*$sg+m$(+1U!RIZZd1t9M58xAQDz= z2jGB4EyQv({j!)vtxjO@iOUFp&arw*Z7qMChqx`U^)Osf{39zjjcU@>?DR>UV`*w3 zbK#;Ff|xi-NbbIF0UU&QJZ*R~jIs*-1Za-Awz(xlI#F#1Q~9Li(r%EK2JeZ^xB>L1 zR0FW0$WO|KgtZJLdF41{BtxJYEiOaOD&K1SN}hD|1o5mx2-a^r_X6m_Cz0rqE;6y& z_BViu%NEMT#^Y0~Hnvld72ffqxtj)qv>oN9QYc2tF969zt9#LUm}UoJ6cvjSb`=U1 z!*9P8r6N|>$OT(Ry_cDWjNhf#ezXXn*&uQV4=x!K3h!!)pncc_|73Z7(sp2WLs_Nl zH8BMP*9W~MJG5L}fYI@_RAQ|UL{3CfU)oo&G$4YHtu)k<&V5$R-!J4CKeC)^(|vlf zOvNjEh9+{h$Rjx5Bg$#OL9WvLLL()#4np)9TUHrcuSeWBeOnj$E{(p>A**3`>M*yY zX)S;eSRtYh04M zkAj@oU&GV0BYy3=#thCWw-RkgOCto(@Jz_8 zT2BNtZidqq35aa6Zoh#cW~J60%Qi6-xlf_j}) z0{~YyvGxm8#ZTic&|3C(sgdMH?^&1XCjaB+HEsio_H-MuBY zgfb}&%IDQ3w-3I)#DtuglGUd-YRE2gY7_bVn5S>?!-yMW4Uw^2y{1&qO=LXn#MqyJ zQiC-SGsyF@;j93vW*J9)<6|y(TZ7288`4RYNqHNQvWwB%ryA+Aas)&qW^duS6Z-J5f6fDM z%3QrOJgf&v28@jlQXVjKdGZ5sCQT%Ws*%3c+`;W9gm3$(=-oVApYPBM)wQnDb! zhN!1v-f8|0+sOaiT^9B%qkeitX4Fhs6V?!?=$@E!*A1DsO|h zzGjIVcU8CVlQs71T{<*SQdnMZ_ z`oqSDwxL{Z-U6Og=B|q$PSxFjh31pgLd&4kc73Z|^IjUEBpnUQUn-z5F4Gm1ix z!BH{wWe6Qpt(hKYGQLy`4`Dgxkg2-Q;A>GKYV_K|=`#+SJr?W_mDBeYn`>0k0->d zUvY6=dIm*&e&nw{&OI1z9YC3eMaJsI#Ab4=exyOBDf7e_6v?y6nw$ObW$oTU@4FE) z%i|wimj#%ph;rf>0a!YztJy4{5DY}ou8w;ktiaR~`h0ok(0BZ>>YRjF2m}_W#nn?X zi~|B}&D|+$6Y~WH??<&=x29@dc9OktZN!!gy6)MpWsSgwDYb~;|J)_1PHFTpoPfd= zpKBLQeg|+XSX3#94+nfW_PtQFx$8V&P6zmSkg11oI)yD1Uj6zS=W?r{lfQAm-el`G z!d!*@rFhU+7sb~65w;v?5j{5D zX0PH}*pky4=RDdep6aTS5s=lby^qb=KP>|=kFs18!K|kp6|%nEE70rg`X*;K(jnC1 zegRzHq+~Ez1H}pShT)p&s^1Vbb_Aje<*x)G(to-Q`yV}7i>LqC|J&ZL8{fYLp?~J` z``vE>zX|*%@SDJI0{?ylKtBIL5MoU(u|Hm2pfDrMVUM#nN#3rE9i{Q~UZids64t5* zsaz^1;H)>YC!K`(Ml*V2%qH16b&l7xwX7>JW6dSq~d-Xp{XqjCA`$X8LQBa4r20$_xK7`Jo|i69cc z$>534+bPS*6@mnfvp$iY>=s>h%S)n{!&VE9=^qJg<3CNzYEN4VUQSn1hnPa1=g>2^ z)$1vQAcAaewYw6i#BtDg{W@`bW~~F%*4Cm<`upNo<|g! z$@4G#u$vs+Cmu1&R>pEOg3J>$e)2D&SSghKz8FZ7r36o^#7!pIdGF-GI%BMd&={;L zznD~4EzUnVY{%~WB8QcNtfB&`o!igMbbu%b?d;UGh;dO1gePBx6F>8(5>ZdiS*+&Y zf*mYn6f8Yb@MKA*uIZkU`y-K?PoXdW=BJ!Yztr}Zb>VmG{jxl=*6|gSiY-VDnGy96 z#Sxgi5Fxy-bUsVWUS-<$R#Jlj|C-*H9qVT$bEj-GHG=#y$iOA>&jf@mr5?uh{VhO3 z2)zpBIu!!iilRA_7zIhY&Da&XQV?GYqjenKoedm1N-%tk5HQ`-z%;Pfm^+2Y&X>c^ zac|Tkh(bFPJ<|M%5sR@?ouxO@+@~%4wX+wTwn#OInz}dTZ>O+ftBWv4)#~YvG0nm; zsBn5M2z9g?GShrcgiq=!aLb~j$h7*L=FJE2(vL()puQF@aL*eE=4mu%-uN9+Mq=Xk z+*kcxNKr)_izs#p0c=o8az6-SmeRIfhnI{do!W!a_K%(=K;2j)^guSYrl%0Z7OZqY z&9Gp8c==23_->)?R;A-8?aqS-(8|4NB)IP0jf+2v-0*MS9`{QOQ71EfHHR=?8u{5M zr+L>xq#di^pUOca6x07_H9+k7byO*?3Bp)0;zXTrfYLT*`yprB1>9Q60xxIya-l#F zWFu_eTqZL|abn`y2H1?AEjs93nRI+pDtzm%s`$n^ZV=KG;nx3i0%YY*E*|AH_6gM= z_fQg}%1kjmbC6NU^yM=mT0Z~r=X{;cRWnzO@7Bwz@uI(!UxHaw8Q~dYHvxjWYMTdz zK#+ybRxqd|y|1=#L)6LhzRAFr0EheOwOK9}SgnbUe_v(}-zPvr@RTvdSWhKhojl-{ zo?raukMcSa-e*^ce(-XA+3(Sroz>mUm7S=?&AIqJ-p)SNinVBQVKQVFa2_mW9B?-= z%9Rz!NE1D%A!AAP6<4Z4-9>K4ZZWr~Ru4WxRUc)XukW|BRBvMZxdp_p{;BlUY7 z5cwMvX$4;2ipe*;Fxm`eAF5c7GFb3wJpwNP94a_tHy>maLzx_nzf9_%tkw8>jj(XGg1GO9WV|$qVyb z1#aeJz?R3OgseSbmD?AFn1x?rFhh;m>2*W^2))HFU3~tgdv>fjWGUqQOaZ&6E2anKW}!SQio36UU=TMa1aY3aVF+@COfm-PZMIlu^(-%<0sdPG7<%n&`^I`%G#>-_^+pgelW1 zhh%~%zqPt#+=^pGRn%j%f>HXeOPngI{_9p3_*_MFXf|!oY@P_Fq-!$|wituu6c1M# z$h!t*D5{px4J^{v#2kkt%OgW-_Qe(QF~kSwB##r+I3H9!Uh-9Yz}+z?nbtXwad|G{ zGF0X5^l{ms)#bo1Uq@D}uBfd=Ya|hyE+{MAnbDN>a2!JvY|sjRSsKv<&{v+L3c0Cm zy%^JNm`4`qquoA4anZ^T^?Fb#0t~4_Ql`QJdL?M{G!PPjR65{B_;I0GrkNg3(DxoB}g+SekDjlTe_at^9@Z}D9YI1@^H5Sq6dsrs{Eu{1$1T9?slz;U96@mtzVzj;i84BYDLOzK{w{PxeUEGB-2x&|B-2-_{dtr z?qH`bC9w{nffJy4={SarjIRI5MsFTHun)S~$~r&)SXZI)3L|7vv~-*6T^r}Kbobyg zn;8Amyvm*2XmC{Ehm1d)eojW1a^3J6t^8fPh%E6$K^yfXd$HJ|?fQ7CUKTT+F)^x_ zN%_-;i=oy|TGBU?@3Z!UFvpjiC=*7F9jGQ2ra|-K2Kl}$4FL5VEB;5(bV1qJ%a0gH zXTjCgg6?H)MJ`F)sO^q7rvMMWg0_Iqui9+rVtEn7eJ7ypqnjHeqfOiSSpRFRJV4;T z?il}yl}|7G*ZK7SOg`!O*ZF?M%B%m;WBOOyuUPrs`G1uQ0N_`wyfo^+j)DIbD{qbd zuk-!uU;B^Lzmn{D|3b3Ef%@m){S`z1TeABProZ=o6ZlQwH-X;-eiQijBmj!`50V{F zP(HvEavhjqBRSE#FV)Xs@mKNd+uQkw2&KY|JUnQ^#l@*<7-lMr#)2r9J`(GnBiT>$ z2EvR9+ge;~I9=sSkppZso>FNbQ^9#lud+ARNtxXsSFH+!V58 zW@jd6VoPO?6nhJHvk@ZY9}n96q>vP_Fh1^p{$sVLqaBEQa@aVBLLi!* zpL{l~a2+sX{MuIT2Uf#r!^-!StMSX%nfV(MZJYjbIScL;#CoVrr2{@}S%yAj8tn#( z;j`NZfb_t1I$9hvAj`ak2uOWKe=O4eKSJq_`O#gym?|KKDBy&5QTfYuYGWjd;kS8s z807Jt<85R+83mK!tvqm{bQ#L!curRYooRaQ(o!k%M28tUKU({-Eb7{Yi2MC$!`h%# zq2iIV2~H=1zvoUfyneb#6`y#B5r*EF* zR;zu<021TgqvucFRejO3g%>Y{NKWOZ1NWUV&_2AW&LWtcxcMSUf$?nHP|L~qdR7=t z8}~qNPDqQeXhp&krzXee#d$1Kk6_oCvJfh*75`r+LZY~Ecyq_@NVSPzmfQ;KKgx!017YH#3y2=~$nCfi+(Lvi~FMO}^5 zJFDsJ2Gvs^iOzv6*G(COh2*AnQ5ZS1PHo!LNVysPb<-_rXlo68YbgIuY~uiShz?|boUOD%oiSj z#;!5_rH?a4IGJ48*6$ivf}i6#B|^9r#H1VQ0lT{ux_$Jtg`w--+hX&hmQP%()nkZ_ zumb%Z%a69`^lI0jevJHm(E9IrG5CE}|34z|-_7Z9_a}>A_8*_E|Jy^Ce?8|54%Xk5 zg?R9P#QA^woDKioTm!#2{IY+^q*}vrR|Gj^7%KiBV z{om;y>wo&kFZ+)?p#Dhv>+4IB-rtKC{6Bs7Z(sF)d;9U{>Hc41?jrQNJ0}MiRpom} z=w|gN#;ckmB3tV3Wn^10FqeENoR~RI58-GiHhY&7S^Yb$^!FcMAL#c(SWG2~C|su* zH6{831g`+qOXITGzhZ{V#xL`M27ayY;g<3WJb?Z_^Z;+Qj)anG!RIq?|6$D(35Yzt zm{aTDt1D<;I)s%w8KA-}Wx5F&g`yZ4#BD%}q(5L$4qdl|C&QR|+acR?a-l6$@4@f5 z@5vizbngdDyiLUpp_->)#`CW4&CxK=i}EVF6{=NIK(lui4fxVl*UNHda9hBi`o#?p z!u;|%6n2TtC}Mg;1(AdUtGE#8(O;B`gf~-JS2tBKbyJb4m}GjEjJE}2s-qURMoLUd zd2ozEn;HEvr(3`Ck}F`aD{*!dtK4WA>>i;Q0`_UE{#FDUU); zIvAtV(Gx9b-xGeHXp?8U=VkmFz5 z8)9x2j9?ZvOdjK+M1utYOedR2%>5M;vUESPX0_0{AvMPxcD_BQ9}st4%QH#UbNA7@ zR^4bE3#zxP)sRP*B4r30To6SfyWQs%dEl0^jYI9@SAV58(;&r|1T|3qY>#+24`-R9 zeWXmrsh#0jy>2|C?DJhlrrU(afb&4x=P*U>eK%i5ZA7F&NjOmuuZ7Z@8GJmwH%eV4 zP!2c;RDHmt^A+?lfnQb(P}Xi|9p-v)OaQeM!59CMb)$=Te4gReTkX{!zxFviB?72r z>B?{#|FpG>Beiq>$Ex4*^>iVR+#3_o7#}lg%j%uN(6gdbQ5!-%ePq{V0yy|W8TSXL z~k5u_q%zfbv;(eq% zCr5yy8vp8e)U-)UvZYAMV}T{4PN_po)~@!xIQVLLEwr6$j#<%(3*hM2-hiSrvM=nj zKn+K^oTh8JBvu(&QaG_FW%g+O$uVo3|4!jz65-ho zKQ*!b%9+L|TsuK(a~;tSO(VITOXQQs+BY|v8j{v`A%#vJ-+h2Bl-?CQ1ol({_ztk| zv8=L1NDX|S0zG84J2&IizlM9Y#qXrXRDx5GrUy9{N6Zk=M9y&Ncqt$x2RwiY9-$0! z5u2I4(I#)+2A74*&cYM}xRgu}7O1&j_)ERpVaK-^*3Iw5BXUM$J8+U1u5lAv!QGNi zssPMVOkbObIqc~SZn{)3N>!1-$T4ksLbm<=ncb&3wM6n0u3?_-3TWIla+^xOdcC$c z7f+Jv)nqlr80N|0#`R_vCbTjI6NP&@ZUB*VZ{lE`+c+Ra4lBxV^=TTv!X8|v=kC9w zSmx}qeBN4-d(S=7DD$67Za=MW43NEM}`0$sfe)@(-4QilZK$@e+&0$(*&Cj?^rvG83e~06YYcR zh*8I69e8<*63Rc80raxx@Z?3ex=ZUyw91MOO+M|eSAlqsbDX?FM;SWHF^xRkA9oPD zA5=h%vT>%-hvUdZYL>d@4%r0v$D5ZIe3j2e<-68+RI`M;I7*`0D`Z!_E$T1V0VT6R>w**+-gxt0x2GrY8C9d5A*atF<@vto#>UdfBn%2~iRgX0NK4`Y#+8F>_5KgiK+*a-rXC94!D z>UZv#VBfKA%JGK?*C8Vl2xI9PD%o@*KXv8ryYx@Cpf?AHjR+THF>7N#XEtsZv_mg8 zeUpVv()K?^lEkLSi;K8p~=T)XkMqC;aZp6?}1?3~HV ziN}mYeU$^%u^eOcfHyM8q7(tA?ct)Z)@JY?g(8;_38`%#mfOWePopsK0DLEPmSXSV znP5BYl7AiuXk{tN(sq3^%<`lU$HhW5fI@q7ue}D<(=MfI9J6 zZB3GaI;vS~e8R69tf&mDSLGyUSCHDOB>_cxu4I`QW1bQekaj?uuT8M=nmd#kErGsB z#hgeK|9`q;@K^ByZU5Bk{*O{gQ~kZy_3Oy5aqvHN!hR+Gw!aDdCh(iUZvwvw{3h^k zL;(Dce#+kzXCUC4pLZCNkQ@{eAcg3a`u;>J9JUn=>sh>Cmy2o2UlOqj%t9Nx_X{MQ zfO6-!nt(P0FunZ}yrV@>7?DWP6hvgbTTv0;tFg%rJaSichewf@1!tn!7=PU35SGAl z-vF)xlVC7V_L_8(vu9}>_(7@}6~hmNvvOfN`AA+VS$>{2F%Ig?hjFGodL z9AA81!wd2pA`hWFXH!kH^0NlVu>a=#bl@XCIcZha;?bA}ZqG&$pcrX{2b%bTB7V}sfd%10$(%9c}@xb6GUXf8a_t07m z5Z8-Md>o+luU8o~X`yQuY|n`zq|QqG##u!FWyrU{>ve?|)DFkC$NqV*t*yKN!fGo$ z9ZtrXASt^o5E_knNF{`(X>)r@k}k`a1}m^qP~~|U5kvpm_}LdvT3k+}!L-_O`WN$B{jHt76&4GNn?bkf z>u14HO;xWY`Zt>tRzs)Lr3m1-n|>O;-H_T<4DfRYWrulpT6i{_Zg-HyG^H1h3FthO zZt$Q6nWXe)YLLzbLlPdHbxGlOM)yxH>gis^a zZ(=+Fe5#wHSF!%I$=#N*6PgmGjL9y~&*asCjwHf_nU&25_?m6?jZ_8=rJ90X$}=uU z6E?jK_NzV2$?@C5blJJ7YIJ!R^qtP&Ui;gi!)Eg|gU;orQlAB`%hMR>(Xi(>cP`0X z;nU_cyffkp19Akdfz2M%8f7v*iD%0hWsG7wr^o>Wk*V&e{==|!1M1g+Tv0I^Uuehe z8-I=)?lGwH@=GqYI%Ao&+5`x=-q$tMwom4P;H4Xf5SkuqhPkT46~2JqWv>;z2Ca-< zkeAYf#U#Kbph}i8L-{(ZHAa5ag34&U#>0)hx5>Rfsz)kYU9ww4R0Lwh#eF%=&>s>K zL4g?;v&d7d)Mtzj-BwNq6Q%lbM*;=8dCINy+zF#Aa6raVMo+c z_sJ3{rE|m1t&(!=uwzMKOw2~aN3Twxx}l%v=k^1wtB<6iSJPkGWTR~5y6B9P81||y zYD9dOhxbo6h?_>9b62Yil}F|2?@f6_3q}s&trs@#DGPPPvR0}!W9UEiM?kyt1Ay$# zQp#r8Qi#RSG`D_wUo=!Bi!;N}eRANTW~hVi(SIC|)s?#4&Rdw%iPDWNN9|ip20wWg zkFK7i8wQ*YPx4Mu5evfi*Ocg>RS(;of|kYRP>LRhnCQ*RE?*iUea*yZAsUJyI-b-OW;N@jF`F6fB> zr}BPz3d7?&Y~2rP$tl*(C;;Yuxb=yyat&!^rMoKXg15y>_$Q-V|3I&OS2kjB#KA)W zuqCNb1k1z9vXl^q6Ccq83JrMt!T0%@V$eNTQ34NcRQ_NGE*owuv@?sL!p*0${fhnL zctat=WdNX}??Hh)ZQCu_+TUJ?#>c7#KXWV9*d3WX!=JG`gJM1gK3R*;MJ-dOu^>ys zmkKc@ur;TKs0=23Kjw>8BcFybZpPKINnxmV7qbHPu1vJrSobg)??l6yR-!z_`^bkL zy;kRB=uXOBqC~UTG|W?R6x0U^gc!iaD6_g6A|U+g!Gscy#S{{RpDTizMA|iboY&xE z&005d_S`}0lU;Hpx}=BrG$yxPvdqWh7HjoOj2^!qWyIs2DVZcmTe^$<+`ep9y4(*VFtfbVYIk?R-FfVC|)hDQ6& z5R)eA4cEXmyIqjyW>NBql984{10oqqylN`%pC$4_-D8j0Vz|OWm2ztApBA_Dft6Y(xSfSh&8O z%iyKSYG zxN+=I!4D;s#bzc4?1_UeMZuwfX5*`{B%%$j5Xzu(B!PJ081o*sdr4?&^0QNmw{U<1i=~v`JBf z01^zJf-l3g00-DNCy3Wsi%3mky3X048Ly ze?i*%wdyP|6=5nf?|k6oikhty2JP)T9TY~duh&?0^Q}7wMkBg&eqUGpOOjU!zF=oHg8F;%sB^y&23>2c77pdFP z*Qp1}cO7JH&Ju&5n&ftTG90Hn2|HX!+nCc>x6-*FiK=2Oahn)jO0?@DCtZ|L0`?s* zBSR|lolyI5eV3OG@AQgey@Ucs{dRL`%s%?U^CGK_HSd>+KO#S@GmeoQQi#h#}?YBI`>e?kj{?tEOea)eVdySutKcb#(E+K z0P>V#cEjv0VsNZ7hw2lo6W-u=W47}JHeUDP?+BYVc87EMCjn!)2Up;YZLu&U_cyVj zder-`L@7%nj9k!rG9V`(w531*bD;nuxdgbAm|osdOovNj5W0MFln&|(AafJK27D`f zr#&>VYg#MTgxA}tR|Ttb@f;jFFD|_H@DZ9tjR#YbL}1`gY09#cGoR zS_D1w$r@)Wxc82*9gx)nYKqEcn6SdO!4M8_@RylVJ{#z3XzayiuvLCU#}Lijx~#?| z_44f1BdlQH9&3s@&O**Lz95ZdBuojB>Kx7%Xv6al0TQ@D5`D#@=}N1VMj;$EQ6u9$ zVkY4tSnh>lZv84TjWmc$`G^#4jek->W)3PhS?Hk}s>PzKvQEkG2uBOWl6Ja}f)r;6 z7VPWUmM@<`oi>fbP=lH@7-0tkbB8AtpzGpg=TE-Np(xV_Z@a^I7_*okhgM0&XsI4fK*;N>tC^x}gj< z>U}A}a7umsa*n2J74v?MbqIAgH7N~+Tp4?F>e;5McT z&EjX6Lak;u6?2zJU4@q_42^alIsq^`QA??i(X$(%;Z9Fz)$kok7MY#-Js&>1g{pW@ z7HbBDQ6P_77ME{xi795(HJu$^w1gCy(w)-PyW?8CPj=FBn`)4>*pHh)0W}$cCjIL5R!ld5`Zp*;r z{aP55JXkH)-a>O=;G)ize1$MNbdZCWd#mxDUd2$8uwb9U01qeSGcr8fIe`>MzVq?( zF7G5GFK!xPkbHFjMVCDn@_ij^&i50867hj-^srPwj`sjiH?gQ!k-5FsChl-nhW(H+ z7*i!XPBNqFfbQ(Xv)$RG5qs<4(cDrrQSkDZ%N)_^d;U_>o7(C#MIOrQ+(D{Zq{+WA zu1&AHH--np)dZ{j!s&(olt>*K5ctMV*LZ7zv!|`I4|+}Y$9-2t zyHcmzn40CLHr$7CIl-G5R2BIVw?(qjq2Q)`ZT_Z7BDA=`5-PADOtD*T&hj6W5Jfi& zf?@M46m)u-gGHS5pC-ys_HVvO@57GT$OhxVmH2_QTP_s$(SEf3zzw6s9Q%!V2G6** z6qO|(5tARsy*uw-*N);V>7?#d6yia0tp#b6Ur6woh>eb7OsKeJ|4P?J{|jB4MDU+= z?O*Xbzjf_@BiZ8jsr)AJo4{`ZzX|*%@Lv)D`y(v#f9l$}de9O_di-ZwQ9uKon}GyA zZLUy*kLK!-i_kLY-c`3CS|>W80(!h?WhXp}Qz1r5B)5QVTZlfDVf2H{e5?yNKoy!3 zQ#kHdz#a@HMF;x^ ziZnKLb_%2nYaqS>u8pHw0fJH9U!sl0M!W`FRiin>ed*0hK0AFx@rh$mT7zhn(JbFB zknMh|eVO*HRLY=F@uVX2QVC#TB=xAymz8K>TzB?p7&I2d{rI3>xgO$|^hC1`X?`pf*Y6hbGb;@{7N)fmZ zU%|njEGolWUeG*h-ELZ_0Dj0CR>f`X?74^dd}$v`-t}6xn@ym97QsVdKx_vBYRqT^ zCLH-dqsf z{h(bWemjKlI;TR!k)D*B_#?RgG2Voy{cYKL{N-5KO1tX7Xa*e13VXD@3y_!3au zK6BCI+v&+jhuJ1YpnnN4(){C9=5xD~+XMuEVy0W4Yq-^7 z!=^)U1m`xOWk)VJfJUBWpw^|BBFL=}LQ>5FzQF;02aCV}9bKvRGI1ax6yZ^?VtDot zTOvSsrWi&9QaS?VjaeV@pzml6i5HS_VlsC9YuOi)-y=T_*NK>I53zor3J12Giv|LGFgIJPUv$5;pg~4`UNh>=_z;3N2;7D= zpacV+IfX_roP8*k`#p^Zv znZYUSSxGFw=Z}8xhVbJxMk_4bshAk@yn<@XXrw49YlDdksJEjvwvY!oOh@qN7Jkup zHWA

    g0WhL%H3f`OQZ=xIi}HPdDJ~yTRABRnJ>2&#j>rut(lx05TUlCzA0t z2IQk>gVtXJZHJ<+`1C{(UMzYT08F=$Vg-!2>fQ_F#w4vKvcOzpxQKq14|~`uStXaA z)ccY{hvHQ(iApY)e2P4Vycyq6TsnIWwu$zdH_C$hjO!tn-4|!Q&EJUI)a;)gQx@em z%udB@y-$JP>FTjGhPx$%ME%JHWf$k_&jWw;w(EyRV~7u_7d%l=bS<9INaoS#gw&*8 ziSW%i{N>^Gv|7lhFD#^;GMwL-bTUlB-Y6~}b1Ex9pN*CiJ!OzJk0{|dR0rZE zndhfn>)ON7X?s9&4%)7^yvc<9NmsDp4wOz3kzt#i2&#{}YXgxXwbH#@$| z+^E-BZ|{2qqgAGT=_uK@0uAD6B-MCP&)633Wf{lN5yy{wr9CCFK77cvV$EWrl^%Jk zO8P(fJzEj!@MDBXjEkvnVn95;&JxCa}2(1h|&I=-U?P^ycNB|Vu=Edh&%!#mP zB7cs}D&)Nr+u3jG=KchNs8(S0TR}d6dxP%5YdhBjXKR6PD}gbcXs}%Sc%p$Vf%3yq zhLE53s}=~jH~4=$j8@#f+{Zjij!%Rh@vYyp6AbZ2HSGlzwCqHI3$;uf>p57;homF# z(MBi>s}aF{4N<9jCYR>!ozWP~ZmSQDNC1wNRtl$VFx*4}t`I5aa2}3S;P1J9Nccx{1vlNcjg6qeB~Sd z^aK@i5LD*y)*?UwY{)FC{6)w-70r4XIl1t zV|cVQj}PM=ExB=&O?VgnBg;JBSPziPUL`0jFYDL#gvA*`CNG?<3h+qxYrSr4gHNdI zxUTK@xD;YzHa0grJH^?m`8GTOm2Ce=Ct+kyoq4F9xM2{u6K~LDpIf3(E-Kn;wpxb}jF>Da8Mjz}$ z8C3Gm{UbOf=E`Wo{pZkd;Vgb56p0ZrDTyB-*BV)dvIlMML@NGYuXq!C0< z_p-JE+$8PAZ@76R1gD>2YosY%tU004He9Yr2IU$Du#-rBay z6$gW&*!uc}5i>+L8d7wt^^-~}kohu)$!FV_D_-c^eR~3KCRn#Xo_uGW5=UxcCO`pj z2-w4*apo3Sa7NAjK78P4T+C&TUF2~MK|=x^xm;K8;bBg>&AUV;+`GIp?eV&>F(IB4 zuRl7<@#hisa2=6|#B!&ge`vfwvbrQv^=ny~GW%CPWgyKRzwmkPW=#=b{5w+qQ1iL8;+N&F-pvZ>ndgP*yP*Dk4oTLJmY*r|XuGkKSwVL8Dn5MAlMY?o*?af1OxGv{- zjwT4amL;2@r!D5@>!P&u%DQgzHKJU&SrTdY{aGIXYY|a%H^{ibYwhoX)U^ zaDYByO|sH{kVnlb{far>Z^v31XZ_#9oB0zAicTf9d31y&zj~WoDr+ncHrl*x$^%+_5h$NF!@G5ad z&)r%zOlH_!*mNms$BD4gp|5=DP7t?>pSeVyC_M0FUAciuRmb)CZ42}ilSCIpw4~M=&{O2pAUcDfg@}zFv{RW?Ok7e97X*PhJ`36uO@|fiN-n(E2-kk|#GLZD z76#=9#Bc`cHGFAvj7R1tV3^%e2Y$OA=R8K+{TNblN`fKXGLP_7;EmlaGW_zGt5ybf z?k!=owmp=`@MlL1 z1qjbD$zezlPDkBuln*#+A&9Dz?)G?i%N4|86>>W^7SfUa0?z$;m`H<=uRt2Tf9HGsOTvWh)i%hWdwQ>SE zEpt&`KD(8Vu^KI-ywYYbG|EBPj8M~KvnaH}dUVT`g5%gRN2VOvpfMHPZ4wr?W0d*g zJ&&s}*_5);U<1I~X$bhBhcJyG-IJW-WI|Wsg?!^>vSuK_2Y(c_$M%JZeBvOcMPFmE zzLmu`rn*>50}p3)qUq~9*(`mc9i0wNSaj{3`Mt%{*a8K&qv1{bG{5~KX`;QsawR=6 zl6EwkQ`<)Ww0LBMx8vYUA<9&2{|AiHw+@)9dA@I$O5;ykHftQZKga;_CB^22*)wCb zo=JpPQxXy?9E;fR%~C5mweO~9!bN3@Nf+|-a(ZX`0WST(r?rh~h~AILwx=JN8PK*~ z6S`cuWyT=^Um-k_G7p~Vp0k?{08betDm6lcY9?bU>tF8qzR=6S-jZm)1-!Vw3MZWj zRRHH*bg;(|XX_1p|0@~0>@Q^ObG3h#v41^g_*=&QFUR}85B?_bo4{`ZzX|*%@NYx_ z-2WeB>`j}@QJIQ-D(nJ9Z2Vit3K3c9y?S3<{P55nrAqBlob!gWu+tyA92nMOoc&aH z>vO))&JYX`2zZ&aBb`|B&Pcs2Ho^E8GQgc@RtH^DQCpW*_#uSA4Q44s^#R{5fpMv7 zXqS3+2aIVp?5&cVYg2w`V0q5vSKdmDzgeRJ?VCO6cAUSXBv_Vwn6R(_Jzq1nC=K1D zV?-p3+le&|FbviedA9XCo+{9I=|9k>r3vHi;OCy^wA{COKsTbhq{%-HMo6Qg_}R@& z8HiPD7Tem-`u6EYj5IjLsxU}sWdydf)ug@!s2tB@@Ag9e1P6x=KE|r3?kU%eI_BNT z7Y*{)KEy!w(!eM-%1F!=_-T*j&Y};Dex>pHz{nMw+8D%<7M(~S;*dFen4*K=*lS%g zDm_jFP!@{VMNw>eq3!yZEi4<=0j1HKH`P0Ofz{+`5nt7g3~7+EgmHLz24Wt(P#zP^ z3Z%3^7!>twpAneDFq$7{b|dOe{Z0Plp7dK2tJ>Do)((|&Sw_Id-{1V%oB!`aB z=er*<2IY+GjLu9>R)I{FE;kNpX7jdP^Th+kUuj=agE0N8vtLLAy@9QlNvZy+ zU+Pd-(mCDk%rec%+z~hsXk;qTLsPHg+9<8IS|G>pPD`x=t<*>|S^P2I_QBW1E7e%9 zh6(v;1;e2F>5gz%=F}RuZWyfEY2)$YmmXPdfLgz4w&K~|b_;vG9NcB=8W|M0xzav! z{qMIrxAs*VyZjJE_Qs7KDRn(7qz6^Sn$rE1EZ^PFrFLKA#pcTy-O_2ox#5F4t$Vk5_jt z?!`vDF%;rlaKJF_N94uyDOfnOzKQ`S?2v~dri=abA-r6js>&9QGJ)i#+^S2v8d!fAT*-uyl|0<9HN3t=6wDOSK$j%j z;Mn@IEH74YVwLno{5s6~B%~&4DxDK_GUbF18ileo9xJlXjZS8*rvj}*2vYV>H}Iua zo~x{iP&gYgDV|e{=B)MA?fHvv1nW0vjfvo8Fk6j(ugwZDBABKIy{a=gim92 z6|MHsxAbEMzX^14pSB|l3J*9}=)oq7$t0{17SZ25frSDq{`d-Lc&YdjUUbP`uSXQSI9^3h&C6DCEJJb(hP`5G+{wgqqh(!r&BGwj?oGZR zJ(~Zu71^XmdzS?{gU)Y%bwzL|VMZ_FG82v*o+X2E)nq3>0_;wc3O$^)K@VK@tV?5) zw`j^6QdF$s0J9Fd|ck@IC2%Ir>h({H1tcIV$J!@>@ly z0W2NUcXl>1wcT?iz2yzY{*Ae5UQKaHp~ZaN2hzK@_mH2AmC8_@84pn9DZPM7F0pKs zvmB`2E_0*^wN8s={fT5Jz#`TGrh}JUrlYt4*%71VbT&`f(>|>yBTj#YXDb6;2miq; zV&QFka^p+0@-;}B?w-PJWXcrscxh|nU9K7Qw@a6qq%l*0ukQAbJ(1aDfFPqmO5r0} zy#^I$6l-61!-`k?@xk0>Y-G1SD&K?w>sKexvTiVZ?2LzMR62b&3DdGZX@KJ)`OgIj&2-;Jp6 z^uC(k_KfIj%f=NZUhh$REU;GXO~0@|3G;n}3T0H9UZ~UY(fMK>gnIX~%;QRudHkzs zD$e$ZGg&Yr9M<{&)81PE$&oGDf(0sOW@cs;Gcz+YGcz+YgNm6!DrRP8W~rD_U;SEo zHgDf_&zkMGKRde$vow$N>k;X8B5tPVaSy-s!lJQiE2BPMaLygvW>SkC-Vyhu7zu8( zPSTt+89qs)dwVJPQo#uz{iipt^{uh{q$6jTImhAp@ZeT#CuMn&Q>}P`pkfYjoV%25 zN>Yts;s{W4Of%;)1g_|EdxbD{Ve8dV)1pnE=amJ9CUf5z8i|d-^4eFbj}iH{r>7#Z z?yo6@#T+c(4Y4t~qT}0q4NIupEBK+UO)eaPO}kLV0ZT5`;Axhj*52hGM1cE%V=?EK zb6Yv2TuHsHE~>jyxh)wrb_L{+l@T#QArz8y%A{?wL&ig=mJTIYGzIm{CL(6XfjHW4 zV&Br~pL+)ANHxc(c>8Fp)CZv{+)U+hvf!ZGvXSQ^+yKd$6bpBSTNKcZ0hF6LPlMoe z^T;A}`(FiTjZHFcI-9zhHx>7Ti!@fijg#yaLhLaEJBMT@Jxic9rJr*Y80S=~9+}T` zEA?`j_V&+oM#x(xy7X1qnUDkXqz}19(3dp=EbBd^Cv6RY1+IrpoHbVWo8!A}f{zZM zwePvU0;mHUbCab`EdTvh*h zNR*GiKiX>-s0y^sH76qF8u0j?!qwKv7*v3rm|Y!BkK0~)1|ax`f4-_v@at^*>RB<7 zT}bE24pSk0ua0rsQ;TdX{*_Jez4b@QspzW)COEg=78N2h3ay>gvQRqvSX36nX!x)? zBMa&_MiXSzhBpw_feJvKZM9EY&22oAp`Xl0Jk;})4xFN6lZ)0Ia4iY7zV51ooHa|0 z>as`9U=_2pwNRvQi7hFDh=Z#A`mj!!U3wBHoi#0#u~g4lwzGRdzKfD96F8q%m(GaqrPLejJ zDkUJTX$yXB;KJD;jp)4x-9z6*KU7nZ*o29;N#T2j3!|%gD9nYz=wiKI=vk!o>I0h^ zf8!wSjI`YgTjzM+a%=@eO3WIgV><~u!7f-3;5|Nfph_H0HUWC`SfyOY2Y`~>IHa$; zMBiArc^Vrkd3QdQ${WkDxlG28O`p=H5a0}0>)k8kho!xX^ijRLN(k!Sej0NF42^Q# z-(t{)WF7x1r8*4rx#L(mJm$+Y+o2-BTcn;p!s<`ftcXL3SEdhsD;iNgxW$!NMr8eGn? zLRAF$8%YvD))vY;8rmnQKwK!|A{)c24Lz0;x6NgJ?Q-1<&g1PfLuz>1X(>q>&-oRy ztpRBuKYj*xO2dphl_ixF*%>BnQqlc*J7wpEgbj?XgEwU|YrZQGn96RSce?JQ_a!50 z8E-grnm2$IL`CApeGPp}{EXGc>snnyBtsToT(EL``8QRVaq8<63Y<~2iG6Pk7DUrW zS>nasaQnGkD8ls-!17>SsN%Vfh_9}bk;jy4I__9aMT2oZ7LU=BBX2Etn3W9}iDfr} zDpnEz4X2rt&F558PFncDU6H0WghcxYY9~U)?A;VKNn73qeLxgxSQTf)M6fnC2stgi z+}g9;ytGL$R6C;R-a(NM5wK(~r#O}+#+kW?nF|X-`ANt5O1ODP{`#IU=2j^hbS2wD zhW6)68U%z|0hykiKK#rBM4Su!^od;%U#po^3r`zIct(sx95(%kK;_fYkC*j&r)be| zdL7MYcv&hwiLIn>oCP5^RIA+Nbf|?o==hxwWNF&-NiT$PH3E@{22YtV25)Ued7$@} z+4IM`T*f5dHkKQ5l}=sNHk_kx`@pqwh)otaEz@j?8SL=zH;%ZC31KYEO+C{a6JI{e z-fVwneeHxrDW9I#I?Q~L?_td|lqhx5D^P&Evw<&vBYz9<*Flf+DJW?AtR*60fX&B& zd_#YM1+ebppXCdvjK1)c)*-lfGSFu1P;i*eLTRj(dU)NY1{vMW56A{C$mM%_soJu|6rX2+X|Y%$TyuNjxy=~AhFospCG?8kW~=kSggPvS z^BvaRXqa`KYRIIzicPN^n#F z+$`(Y_v{j$2~>|#a-Td}qm7k{m|BnUa;o6IDaxSPj7_Ei=+0>uWer=SmU}f**-gF{ zVnB^g#Ib65N8o|-bA1X)2JLotiX5Y}HQ$j-a<`#NrY$qsP$70!_SWyDK%TWT8zo45 zJ5JvH$j1F*u*dY`n;;a^bsLRNZ_1}``5CfeBQ-SUK<=<8JU(K#y+u(!Z=){1GPCic zR5K#1DiwX4+kl_yBtmY>TbbNjM(!%X`iI0Rkfe1gUWHA&*`8@qrXjOheUY};5TbW& zkgWUd`{!doyphht@lfp4k9EcMChpi^!DC%A9av)n#71WR%^1?Jl}mkm=WC}GpJeR0 znesAM7InOn~7 z=_Ouw1qbeAxBp0#B~ut6n0~E{vj;H7&ZZaCJ>+LYO^v#f+WFT1CAvg>j8q||R$jk1 zs?sRIv;W??ioc<(eYsnX`d#QqHEr7Z81x)aIx)mmzFSCNDC+g5bP7dB_G|%$FwfHA zp%kcq2)G@Hk`3*pafC@Gn9=JzX@ujO3>%$r0+hyo)ync$OTJ(7FI)5dA8J+jM>{~i zAFn?O^#0iXL#tCV^FPxbf4KUG**~^_Zw2`Gtxo?c*HZl#$rDxkf6cZ0!s(w5{Bq>C z{EfhG1b!p%8-d>l{6^saV+eo;{fJ2Xe!PM%zkhecYQZ~oi4S#vb%j{Ko}15bz=y&F z;%n_f^S9~fnKlM0wekY#X7QDiGA&JryN9aZWo7*Q3Ds<*06x_5t5q8B8}1XzBwDuW z;6r3C($wDMHq{I~ow-U#h&U1lIi5WiPY-ftUPlHf8dd*EmyQxzS&ul>(Fc<39AKyo zL@4cq09L@=SPwP&a@E)>Hd|;`*hUQR-0S3`4374#D7LEtuu`;|qA)HlF;nU$)3V-O zYHP6GKKgdfNa#MHeAw5b_RdzH^7T5YB8*ZOucEaB+#ZC?jjoJ*C{IC|>C|lq zgn>SDK2i$#AWgM!uH7Loa|<(VhYxTEml{{)V&&9K)m%eh^>l_EXWzOtTbSn`Vv+*1 z>?<(tpx)(P`nAp6kEDjGvh(>Y&8nAZcigH%GJWX5Z(IB8oC!K-$U~GLnPH6%j>&bh z&((7S-A<}P!QfxtTss!~g{G;c!E;EkQQZ*~>FYi3G*AcOBm8 z5^9~Ev1f2VD1v46XsV&BN!5Z}l=BJ=9>s5mimSH6MK`0308D^3GkEl!vw2D}O#j|# zM}P~-Wnipyo(5T>m|rX^2;9-Dvsc5!jH`c74-G=Z0-Ncw8%F%Op1yn+Z8{@^EyP|% z%Z=1kn9-TdER4aImXo!N-On2+ei$};g@3ldmOc0*)(P*eFNmcKSjzMMV5sHm{FUv6 zDf!$eyS8N14nm76vgB7U@gxv~&ei>%g=eDJ2xm%~qrsN1DUzzkn7X})J(rhKhQIDZ zOw0Xqx*kvM2SQpLu}0Te@6Q+9XVK#+!|aD=G)C6vSCIX~+VCzPra@Ma1E2Wr8Qqi7 z-%>+vE-RM$Qkgu;CdpYL$Bb3dO!@F8EPzhB*v$ba)n&^kEe%nqFo%3p04TUoyT5~` zeT2srhUrSR0V@aW(kh`I)y+`%9ZrA#Jj=U4kK45VgI}67(pYp^b{CKl7tYmp@^WhJ zKwpeaZ-#6sonUIZw>&}PkAl1eG)r4`oPJTyL(GgCC6eIOx*XB{(&VTc^vc+MEu!h* z0VfF7wYX=+gPI2Q4U^W=Hg>}pyw?iaP;m588mGY~&gf!;#NsPaq^fY9aw>6sOj9i;5&-?EcstDbIe4}Q-1=8U zfS&P$DTrQZ#(F3>o)ey9=XPsoXON5|OIKMKm|T<2$LGGRXd3DUf6ciQ*SinkY1m7V zmy|jy))$-igKt|ro$ox*Asd4=u4#|`UYaw&tZNou@{fTqqgvHo;DH@>(7q928!+!; zLMgl8eqT_mM4W}I=e5w(lO#4=TfjE7(}NDm z_q@hbrFn`qo-*cD6LY8I!_c#N5OXO#7|&Y^OWrA=4WJ^XmZ zRP2tWZ5!4`{g2s1L2lhGz&{xOb9f!e&^|&VhCB1s0a087ZE9NwIi_9gf_oJ`8Ob)E z4+0wr;;0HSX9w;)PN|LRlC5d)yu;?g{eT*7Y~hBhpLnO3wqECHa5{Yvxj)VCOFKIrOHCpNVj@>SpX)VcyEhH13Dp4}J>_!D z=BTMneuJkaLqwK3iX?UUX^BfRJJ0W;xl%m!<0=!&{ebn5y)_*ja@65WxN}OVh8C}k zXm4;dlN|?LCrL{MW+!@st7M&q>%Hx%lPm1QwfTeIEwGg{nOu@ajIKIKI}<3=@E}_q zcuc}OMR-M>@m;gHw~Cv)05NSjm5#rvbS&tI+Wz$-oc=;llf8J2d2Sc)~ z&`srO2iJ}6hlT1>y$s9mU$V(175avzErK8`ieY(mU1#z4-@R>|`akLz)r|Gk8i*6I zAyHvR3x*@sn5^DB#lMxtP!L2+z6MiAOP;r)Sm}v;F36JW$(p-3OD4B`K>m2T zo9|ihW^SM>)6GfWV^tVpvZ80ODd~i3Sckn;o6&zr0B^Y?bS8zr>ws5kgUme`>hm=e$ ztzg}}mp=znc zAMOLS_Hn14)yiyS=n=d}Ts~yboA5OZE0$m3U6Pb@gtR29u3DEsh5$lmDGe=ZSQGy}&&|#}qBE)G9U@Q{7$&;D! z4DFJ*=M$2&7RUx<71m9`t}c+ zv^GjtbD=%)WrvsiAf-#Ml6E3-D4Jrd(*HTz0}AjDzTzio0SNFX8v+RUr(*7BT=q-; zCEN3F3w3}5{CNhRf8j~~oqY-We`VyS&!DUShhtuf47vtC<&U3H0DZ<^dd8o6M%{nt znSSY+e(IV2)HDCmGyl{x|Ed4@OaJkw{^Or|wqJU-pL(`G^$hg?82I52{T~DWkUu{7 z=Vb$^{lnS+0o$JodV>PU{q&VB`R|=J`#DTL{`Yo%KZmgqeFga2tajQexv|B(2vSlVCn^_Tq1dHdhb(h5fW3GpAU{*xsA_wR|nXK8=I{(Ij3 zLgIg~{J+jyv6R0*|F3z=Hu3j%em{q)k^X+3<^8nl@cw(dU-R~B=HJ`>nzze%KTrJs zQxP@}^GTt>wYGpu!}i*c4{SeLJ@b>>s3(j^Qi2S@rnGc_8kX}Ord78YcH%c=KKO zUyUG5aoY^WwV0#$$Yr+Gec2Flmuh5bgcBkkZn*>~NZ6%xiR|WZGiw>kEa|KdQ67gY>3TAn3)n5dW8kMzV z-M2tvjdElzjz|Plt(zEjX3h+<5}aKIGn2FUs;6acAHzcu!d3BXJ!-EdIaSurC@dp~zfKDLrq9cAaDmU(eGYnK6rX7PV4D1i7hhK_q` z&FQwj{#99p+$_v#7^>92Q3Ghjma^WS@a$sFR>_Ba`(1(ey{W9p10XxccD_LY&C{~U zJOFt_zsi8@i|YGL;DTV|sed~uY}PtFI7&Tf3u zo8AF5#1##qSGfX(RBUKSG-(`Y!i{9~o}{o*iRhiS&dD|&Uqvr&SuvQ4nGcHD9S3Eq zVyrsYuDK%br;JqKwo4(vA=6nP7W^3$+~X4NZWra8oP4ctJ=2szQ7%L&t*4t!_S0t= z>?2wkEwvGdgmP@!AHwXW7+SY0N7X3H)rs#G2z!>~SQmYl^*}CK_@0*m7uO{(j)gzV zEP;?SNO=z@7Y}Q~RQ3;)?6;ObQRciH$`PX(r{5J4$t5JOU|8P?J$BWEmR1)TEX%k34=`=93=~eZ01V`ckbL%cT|vNMX!eHM*`iYL_5};GnKMQ6gsL};E#W4 zC1yN9Cmix618q%rotUM)XNiDGww9&oDX!2!IG?b-U+&I}ipCI_$e(Mm>5yTv)VVJ9 ziL7RL8SwlHeDL9mDJPeIe|&@D+HzvDcV!r7kymi|^laU}gVQ`g6$6)LB4UjO4L%69 zdCp-&6fND}{&ql`4_C`UQ@oQljFkI?!P2`cJBf_1p8#8&0WU35Piyt_R;)7q^z&9# z3%GZJ6Ud<=@S{|SJYOYmYEM<%@&LpU-}qUVZk|YiXJap8t3FB;pWp#Q1evE*m+oF% zCUD;iGn&L5&Dkqva|;B5L0h{STc#8*0iBa zSlGlc;xr%FOuxkhhIQhdclv;rrwf*CxV#8<=;O!74zKC+J-O^r2`xNR!C&tul(GM) zwm6Sm4Ws9+Or=OjOM9Ie(C%D~^tK0Q)auvK8)c?c8co-vUIN0y+N%a4cuYByAr+yc zjJxZK#;3Z2)C3~h$3|9S-uz;J@Qg=-bATqys3R7-kco?vs>c#G^pSTHudLM+Y;(HI zOx0@(I+^c^Xn0ZeLWBe0iT>DA37$R0;nU@C?9B5?{k>6^Z|DYi{n#{@3%wL^+Ptuc zp^Ko$SnSjlP}t4m^$>q_Gv{un2u6H9?-INU*5-ra4%N+kl+46=gNDL4_8JndblJHJ zbsYpRsfYVoDN%V$cVfa6437>Gg=k<`L#HqD=KF}Er(PI? zJEqu`DJB#wJy~Jpa&KAL=OG8I29(%!=R}y!swSE2m}$Kd+~Z?XlLf50AKSKRW>gP! z?+C9ab4BT1;JX-zRD^GXpin%zxl^@W{&#+DHGKMrgp8>ayRBv zdgdwv-z@5x%QkH}=hN}mlCm^gOByG9(p;YAsH~PW&Gm0T2L}K@Fl?wexnc{msN%RJ zt#=j>xs+3su+KFsK^PY$>pHQ-NNQ=co#-d-23k+eN{BHYdaEpyrpmM^o@a?!d*t{M z)Qnx1;aMYN-Da2iEeKDhzb=t5qyx+K<$SOM#|Z*O@IqYa+n3j!lHR~29mBV9sw;l{ za&d=`@Xt;04cPu@MQ-|svEb6UTsSA%!0I~!#v~Qc9^&t zBoXgvVHa7-Z(@Au4C%jAAv(2VLZ8cKuUdvIs@lsij+ zC^2mHcL-{PC93y>W;?(_Xd>@Wzojrzv9g(G_Ax~|aX<2d$@3le zut>7ex=Um8yHxdeqq7mLiVJt`R9QAy+eCW0) z_*b2g?9a$>4=THDxpYBozSwi>EjP_LmNG2^dlBh0}6^Ki;B;a{V& zU;j@28TS9@hyB+6M&LIBzY+M2z;6V8Bk(^S0dSuGSQ`<9in1*>r3V_}ozaU#N}@m9 zVtN*%%ptK;)^e%?FIdLoBnIAH;%KXkL2 zi}v(xp6yg`U&jg-DqrRzxwFcYEil-_)L;d(>Xwn;#Kujr$<*kQ>^ue>Jk6 z2mpwf$_{EeAyEVoCBYB~0S!D?ToOK5q;Sl^+jARfXa^>AC-?}UD((g)*vc&C8rc=2 zY2$I#A{bsYu;BNNMd-yCbY%UekVS5cVE^OurqY5&o-f>HLNdZTDx=huuJ9ik4HB=D z-SMhkGjOcy*;Sdtfy6czqHi7X{Uquqm9!-1U{iXR<{ul6)%hwj;96Vug6CIZ7GSG} z{OwS>50VK5zzUO5If;kAIUzgnP`nzPL0NjmN$9~?>muBk?yTVuM-H{8nyWnVyJg$3 zeB+4V2iIeLLA87gR|WYR*R4@38~u^8pXKDSboK6b0a4Tl#4_qSvlnJWA3E2@yj~gR%I~12t{;=aohq3@f4h+SV=HCDpdqF|hb*ejUdl{A>GW16K z%xs^>+4X#FrTo#w8lrtBw%R>P3X1sppyX|+xXUqcXF+H8E*+iK`A~7N1BScs{GrLd z{mMxvN|5P)qbfD|Y@0=qOX}Gx)0{5J3#u0<{!}+;ycaJy*3TN;rlNG;HqoXp2fb{N zYs${^5eA{fQD>55M6ZeE@t$0(7Q{8qpK?5-zFF?Pqu z0q2Rc?sD62!fLaY#G@B|AqFlhg_x*9-8=bfbL)NC2f^8P_8unRCI;#nQG}El4m_%i z!!Ql*jA&b-t0%wrkXT1|7l7RXH+(WxXW)IVE)vJe>Z0>+L2~;DN;K;CY=83WD9$*SXV%=Z8g|Ab!1T22@hdPqJP1epTQth{dhK<<7QC zLokuqRMN%dk%&G=sxj%|_>Rsz?N)2%F)ZQA$ziRpYOLQ1-4DxOEay20SP)=uktye9 z+30=o?EhV=hGCqd(9IvH)5QVZ$lXwH8!?4!asA^$et#F=TIG)9CAW>PsXoOPadDvm z_VXM>5@K5|$Eis)HVu`Bw=BHLYD8xmit(}+J!)EHto1{biL0w49lo#m9Rhv#0iLMp z`~fO+V^@@c6yZhke7q5iw&C(DNkbU>&^P63E&N)xA+^XfEPTj-uKkzr5P50v#ESlT zJU4+@r`PLJ7WGn9B~82f9Dg~5S4xKFd#x=O#^J!gEKRrdo)SCIgv`-Mh)|t_LLjM) zUc2g%1VG-M`@PG2`e2z-)LKzfIX;{c#_N>E$J0cLW_gA8^i^J@+{{bs29zT{D^lOx zB3|`Kf(!trE9*r!6xFPW3shF~A05#CtQ9LvOZ|j{!{Z@r&su)28ACPTJq=(_c zEk!3V_xl<~bZebV3J7O! zkE8?R_qape$;i|tz+=`|Q}6ZfD@Ue4lSS@%P91&{$Ad=4`jxdS=v{Hdigx@mU8gTZXtK>joW5io{d6JiH zoEOU4WFt=5uA!^}3jT183PiYy=`no?L}6yv1fEJs_eEC%DL#WYEML)2tm>yiY@9eM zOI!@Za90hhpDh>{=v^VfuEHmVle26dz(dgZTO zpQ$bO@Clx4k>R%{5+A$Vl*8aJq$8X9#dZ-5_qCXhd()O6uC$xD5vQ8a4JTot^FJ`u zj1)3IpcI>#XMevzfi;#O)ERIYL838I4Kjr#gp(g2Rt#ed{6NvZ%_8pX3U}+_DTpso ze2{~0wtc@7JstmzAgsyWWGO-zb%hWPUyB0hs>oXMluPWfmiI~f8Q3{e!!ZTd)zn2P zJ%s)OWFTt}Kijmt*uV!Z!el~`Gok`KFdXN3&OzOFYfFa2?k%{{GvY_rQ!Q`nr9Jd+*%RF@MI;uZw+)$VlcswFx6GcPwjq6>)0EBRamiJl0U+W{LA>% zcbi55rx!Igz~?fGqq9tR`l!2>5N&ejY(1-E1R^n!aE&TG-Wt%0IHHm$$w>EWct{wi zOwyxRQyD^1*(BE$HhW{mL&vmcRrrT}DY;TrLtQ_V$im2pyq$DIXkKQ68dpSns+dt* z20aDUk3O6ZLfI*gFaUNbWj*VzL`&H?FD+3Sdn}3Gu?QwIhr*(oyNRvZED%?pg6dQG zuxTRxo|gaw`1`7c_n$G;FZq|*(SNIISj+#D!T1L8AH%SJ&sK*0$Kn6>{r@Uk{0oO) z@-OlD@6Q$wRsMMa{&4l5w9&ug|9>xA{NH|Iemn3Rf!_%HM&LIB|636VgFTe%%&_m& z(9*QhSq%i=Kd+(g*}C4Hn}?albIzDSacInA3*!qb1a7!*ru0(uRhXsUJ+x(@?>tD_ z3I25Ei^|i_)_4${37A~lwBK;~W+B93L89a1S7#WPdeI*qvY;HkB9^b$9zgF&l|q65i(l=$S8h-0!gG_K zEyZJFofk2pzvA={zy>PdA2O(Bb~bOFXJSuew|%N0PQ<50e61usctg>a`&@P8F3zu! zv)T$pyQ9J8`I=hMJ{XGjbKV9x^3^Y-w&3bB0qoRO zG~S9kYB8|vk&)+WJ%+J+%hVlBoWL&S(q4L7dX<%j1Ot^rhs;cb(z{FY@a98laQ*rA zZRg#w_n(8_1WSaNObtm+0}=*fqJ5G=(h0V4B;4DLH}o4*j5L)eTJx{-*G28cZhdKB ziL4Vjg`;p~k@~B>y?K3|8`&5wY3}me&a`h9Bim%_4E2SZ>R4tjY}RZ_t_?Rh<_ZyV z!)Er=$6#6ya$%qC(=!F73o;6zwmnP_dXj5ZDH|mO*7p?Gca7W-R-7=rWCi0+iVgsn znCz4{*^*23E2LLeDMcJ|Y12qXK)rDR0steLaZHB6^E2GF9uh{$E4G;*hChbNwf85} z@JoVIF^9;7t!q-NeuKC7Hq?E^6jHSEg-Mpni6qu10g=+kln*C=hf}^8rTxl%4nw^6 z&=dTgh2UG;qOPjJ?=kvXf;^rMtrY!8VZ(EZAdFpo{E>rI`B8K4LXOpX>&*BfX8#^X z^Byl?Z$K>Y+)xGOo#HLWRrS>EV<+rT|P$0BeS`Gd$iF+D0&wi%yF zChph59uAxsz(+U0d*5lao)cb(!RcKjQpYAK2HEzT6rEOfm?0a-OlD6Rd{8*9H$W@$ z3$qMxZ3d8cPJFGan{@zf!KW#SU;(`dro8xUd>yHM_s$zpNkpY#pv@Udw3FNPwy?>a zr5c4VHrOUSCU2rWip;PJx@G?DYX@;l#5IQj#-gWKgrN-~J8xqQcgJ_U$Ip7fuxdSo z;F7E$hGos~!)_LH7noCXf>x63;msx6?l>&Grb}>G{Tedf= z;N*0qb-8UGWy{qPo-UT76jGXp%Ooa|^&&2-@(MSX*yxBqF57{P%(7LmuEP3mP;QVE zSlyf zKK9HIbG0t z_e4%#GLv#u_e{HtKyX7e!XP$@mgF(xqj+G$Et&+a0JB5lE)*e84fIct<#*X_q}W8`Q*i}y&&*emS&0S z*2h9iZPLc0sT7bhm^P`fk(xGLbcS0F0N`K>oGgseuS1lPo#96>nlI0DqfTQggH`iz zrAOSyD966vEn6olJys> zT71O(XtXDKqIGU})5KPiTR40IfDYT=vQNXhy_jtMtekhJMRSoBM|O&V-fL2n=^{f^ z+|uO7NWD`CGR8c(8#YimIfPWlULb|nWhp?VYUZpcDAE@AXfAt4slZ& z+F8h%`ouQmPgr|j^tQ_=wHX|?`~b!4c7zXVa3CVZpxrD5N%pGf`p8?C?fILi>?-n~ zeXcaOImCeJOvGM5<1UZG&C3Uh?AuaAj2zmpOg5b}Owoh*fRf-$w=b=g-O6*xDbPn5 zo)5oR_JJEeD!%|c!eSkV@=}t;s8*0?9UPFu6BYWZ1YP1?3}&_Rt!mXd)D!af+&n1} zb1SwF8ryE>(lnEdgyMpP=}_d`Cl*pB?d#)3+)OALPe0IUZ=`Z_YI&|qZA^iP^Gt6% zPdxuvPgyw_xuJ8qKA8H??tVS zf2c2B!@}@6w`d=r zn70*})r2f<_|ZRWY8+095u}JVxTx(l@r8-#P+rU6;cbO{RcNy)suw7IiG}O$hJo5H zXUwhG=J3B~FM1iDy^|y(V3r6Y{}_J$Y>Nkd%7S(aQgapnkR%h`8cOzo2~ayeH}K< ztKIF{C7>XpO95Rmis+^$$zdB>m!V)Fba^5v?oZm{RoV--&uPvbqo2t-hF9%{2J!3n zh26Y3#2%Idl_@H^H>oKv>2H8?>6fVL1V*MI=5@&(eamJ^Wpp2ga2vE?mOd+GLm%y5 zx;VS=VwEOYd?t7xxu+ob7>bh#bpXUb04|!>%74cjR9GT>tl?KMAuv=-Z#Kq*B2J?s zTIqta0e=~H)DDa7!FvO&I>glR@dTZ1xzX{y7WskM9 zrm>`2Jvfvkb#*&K^^V76#@sFx{I|v^^r(2sj73{BSP|*L$4*Y9q z`Ox^UH7LJ0kH0Yg*Mon*`;EYF1b!p%8-d>l{6^q^Fai*NNdt1oo z0)JeWs$nh9UgkrA=s|Gc#H|^U!m^)?Y&`@6eDN8!4ArF;v5vu(GV37JZ{)wewtR;L z*`~u16x$g8m?^KFhLg0cKsWz&sb{WbFh>Iom-;JISF+hb<$iyFm_>y?uE6)4VrjB& z)6D~CEseV8Pe0bU*(km{4Um_(yN}dOE~%F7uJ-XW;TJXKSsIukpc1dOgGC(E{G&s5 z!w{B|4UPtu`4Ez~SR)p)mSK8V~YtWlUTQ~R#-l~VWJe&DYp!JdX zEpw}vE9t%RPWvMQ2@!Ha9q-8}EDobcIRx3=W@nBEX-*1qr}osJi=ds?Q^cG zvKn7T;~rv!(!SjWXKCY1@G>&<$zDi6QtBHoilVg~$S25ZJw4EMNV%Ro?X<&BO&GGg z@O8WL-(-Lx-8K5HTvL?=%${`-aYIPM;|8jRe~;^8A|T5su>vbN>Jzs1=(UiiqA~U% z79oORdALM;EP#YA2)62+2COnzY(tjFeV=x@*gPtqMf-5t+**{xt^G)r%4Or&(CdBp zQAM1zt)uL_>N*#UPc`!A1xU2$`35W1>*@4EW#F(~_JF>#_G9;>{5K{2$Xe*#4+V1P z^-w{5@H{7r$F#u7rdD@~XaL)4P(aK9IYC}ZcXiS8R!bn(gVFU3TJ6U1$!37MfJD|c z$B{E5F}{>YzwX0bo}}ER#%xdivSdYEBM2Aip_^Biv;qjef{`%Ei%y*}h1}lODt~7-P*J2V$wPcB_T2yEdrCA)@n&S`DC*}wqQ0DhW z$_))8EtstuWC6_x_U1L}jQL+x6oGk57IQjRrL*yE%YQtTfut?ABtDo53U4f00FaH{ zX6lNTod&eCR{sqSGagzOC=Pxb{dvI@Sv%rjIlfR7r38vUHq<$IGh?Q1oXv=xPlm~8 zSPxH%ic87|6>5m@Lv+0G&7N0DGYPJI#b&ECm1-aeifYtYb(7Xh2}rG$y_CU@cVPY9 z`8lAq>r!#4Myr7Il?2OM(LAo7e*Yoa1mDfk0R>`qEINvBUEN$RE99DVK>@pYK)~7M z*Fx^MP1{&Mu0<5M=xJ!(Pv2V;HE7GuQE%<%-KgeX5pSi%pBaG{#|ITho3wL?WV%0? z9gp`>1tZ?9ZB{VWR=W(yD_x)*_Tn^mhi%uF(ZAXv>r$nEYBu@40FyOnblPp03NIff z_SUvf+zw)>{oyI!+DXLpV3`PcYF_lTA`@866wY=mPveiN@4GFRd*{sTg1q;eRZV7!91)WL={BU%lC0qT*FM1YdDK2ZM3KCQdhAFM%b<`2iWOI5qm3Qa~b!< zPW6UwcFY;EXM}gLjl<%Qz$SPecc2AlMnF(~nerPC?}7FqRZr?rP45TUXW-}*CkMQwD7vq~s0H0T`K}J2+S-(l`)Jy9J7P&s?s-!VtmFtcLw7pd{ofhM z_l%zSiCL!<1~*)WV+Fr#M}dwJ`T?Thr#0qbwhMZXk5h?TpGs_P4k^7+397ZLs_GlL zt9F>+7=Ioy^C`7o+Zc2y%p$wVO?SD-^OB35=b#cjUAiXiUTI$g>D{G7X&A4)*Z!z$Uz_$tptA44hMT$-E?x4Tlw0ljDg z|KZBp#Q4kU)I6+N&3IHWgKVX#UmjN$hT-?|vDjugpRaPAAw|U}hJcf0C;Utg<%~}m zWIygh<{y2th|s?*?~bRA>qJwwhtOfi=@jD?rMMx3Pw0XXTFAl_1rNz2e zUXiwFazq^>EUbJ@6b%yBde|Swu*`ljbXs4o^+UR9WP-KJ-WD~#8c`hR06td?A z%rD;rOI!QFV+iHD^Er$Wr#6@=X8(7Bvv1l-`tvhs;h&HYb@8a<55{Fx;i%xy!Oqz+ zTji0>{1S0K0bpU`5hs}Hda1qIQo4OEY=`^k#FUc{`E|cohQI0)7lhX8v>8d!fN=rp z2}_YL+kEC@1ng+{neegIVD@r`XsX_KcNGXtrz<~cKEG$bbHX@hKw5mbO1vuR4-*I= zweT;Qg;)jIhWo~;q{{A-i7?m~Auw7E*;`U#!C~GH5nYD!SrOq94FG%HMp5eD=DuT7 zOGW9cPe?;vTta5nM~HU{#2hiDGC8ftM6S5ffk$dlQ&P)0lZjk?xh@t=v_4E7ok?aO zfe`h$3IC>U&K`D)NLVBrd;Uf062<^sMiC{kU0*{`=BI#zZv%vc4tBMkY@9 J{}UeRe*xHW6cYdd diff --git a/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py b/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py deleted file mode 100644 index 07045383..00000000 --- a/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Script to create data used for regression testing. - -""" - -import numpy as np -from numpy import random -import h5py - -import bitshuffle -from bitshuffle import h5 - -BLOCK_SIZE = 64 # Smallish such that datasets have many blocks but are small. -FILTER_PIPELINE = [h5.H5FILTER,] -FILTER_OPTS = [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)] - -OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__ - -DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10'] - - -f = h5py.File(OUT_FILE, 'w') -g_comp = f.create_group("compressed") -g_orig = f.create_group("origional") - -for dtype in DTYPES: - for rep in ['a', 'b', 'c']: - dset_name = "%s_%s" % (dtype, rep) - dtype = np.dtype(dtype) - n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE) - shape = (n_elem,) - chunks = shape - data = random.randint(0, 255, n_elem * dtype.itemsize) - data = data.astype(np.uint8).view(dtype) - - g_orig.create_dataset(dset_name, data=data) - - h5.create_dataset(g_comp, dset_name, shape, dtype, chunks=chunks, - filter_pipeline=FILTER_PIPELINE, filter_opts=FILTER_OPTS) - g_comp[dset_name][:] = data - -f.close() diff --git a/src/bitshuffle/bitshuffle/tests/test_ext.py b/src/bitshuffle/bitshuffle/tests/test_ext.py deleted file mode 100644 index 11be1ffd..00000000 --- a/src/bitshuffle/bitshuffle/tests/test_ext.py +++ /dev/null @@ -1,588 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import unittest -import time -import timeit - -import numpy as np -from numpy import random - -from bitshuffle import ext - - -# If we are doing timeings by what factor to increase workload. -# Remember to change `ext.REPEATC`. -TIME = 0 -#TIME = 8 # 8kB blocks same as final blocking. -BLOCK = 1024 - - -TEST_DTYPES = [np.uint8, np.uint16, np.int32, np.uint64, np.float32, - np.float64, np.complex128] -TEST_DTYPES += [b'a3', b'a5', b'a6', b'a7', b'a9', b'a11', b'a12', b'a24', - b'a48'] - - -class TestProfile(unittest.TestCase): - - def setUp(self): - n = 1024 # bytes. - if TIME: - n *= TIME - # Almost random bits, but now quite. All bits exercised (to fully test - # transpose) but still slightly compresible. - self.data = random.randint(0, 200, n).astype(np.uint8) - self.fun = ext.copy - self.check = None - self.check_data = None - self.case = "None" - - def tearDown(self): - """Performs all tests and timings.""" - if TIME: - reps = 10 - else: - reps = 1 - delta_ts = [] - try: - for ii in range(reps): - t0 = time.time() - out = self.fun(self.data) - delta_ts.append(time.time() - t0) - except RuntimeError as err: - if (len(err.args) > 1 and (err.args[1] == -11) - and not ext.using_SSE2()): - return - if (len(err.args) > 1 and (err.args[1] == -12) - and not ext.using_AVX2()): - return - else: - raise - delta_t = min(delta_ts) - size_i = self.data.size * self.data.dtype.itemsize - size_o = out.size * out.dtype.itemsize - size = max([size_i, size_o]) - speed = (ext.REPEAT * size / delta_t / 1024**3) # GB/s - if TIME: - print("%-20s: %5.2f s/GB, %5.2f GB/s" % (self.case, 1./speed, speed)) - if not self.check is None: - ans = self.check(self.data).view(np.uint8) - self.assertTrue(np.all(ans == out.view(np.uint8))) - if not self.check_data is None: - ans = self.check_data.view(np.uint8) - self.assertTrue(np.all(ans == out.view(np.uint8))) - - def test_00_copy(self): - self.case = "copy" - self.fun = ext.copy - self.check = lambda x: x - - def test_01a_trans_byte_elem_scal_16(self): - self.case = "byte T elem scal 16" - self.data = self.data.view(np.int16) - self.fun = ext.trans_byte_elem_scal - self.check = trans_byte_elem - - def test_01b_trans_byte_elem_scal_32(self): - self.case = "byte T elem scal 32" - self.data = self.data.view(np.int32) - self.fun = ext.trans_byte_elem_scal - self.check = trans_byte_elem - - def test_01c_trans_byte_elem_scal_64(self): - self.case = "byte T elem scal 64" - self.data = self.data.view(np.int64) - self.fun = ext.trans_byte_elem_scal - self.check = trans_byte_elem - - def test_01d_trans_byte_elem_16(self): - self.case = "byte T elem SSE 16" - self.data = self.data.view(np.int16) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_01e_trans_byte_elem_32(self): - self.case = "byte T elem SSE 32" - self.data = self.data.view(np.float32) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_01f_trans_byte_elem_64(self): - self.case = "byte T elem SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_01g_trans_byte_elem_128(self): - self.case = "byte T elem SSE 128" - self.data = self.data.view(np.complex128) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_01h_trans_byte_elem_96(self): - self.case = "byte T elem SSE 96" - n = self.data.size // 128 * 96 - dt = np.dtype([(str('a'), np.int32), (str('b'), np.int32), - (str('c'), np.int32)]) - self.data = self.data[:n].view(dt) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_01i_trans_byte_elem_80(self): - self.case = "byte T elem SSE 80" - n = self.data.size // 128 * 80 - dt = np.dtype([(str('a'), np.int16), (str('b'), np.int16), - (str('c'), np.int16), (str('d'), np.int16), - (str('e'), np.int16)]) - self.data = self.data[:n].view(dt) - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def test_03a_trans_bit_byte(self): - self.case = "bit T byte scal 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_byte_scal - self.check = trans_bit_byte - - def test_03d_trans_bit_byte_SSE(self): - self.case = "bit T byte SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_byte_SSE - self.check = trans_bit_byte - - def test_03f_trans_bit_byte_AVX(self): - self.case = "bit T byte AVX 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_byte_AVX - self.check = trans_bit_byte - - def test_03g_trans_bit_byte_AVX_32(self): - self.case = "bit T byte AVX 32" - self.data = self.data.view(np.float32) - self.fun = ext.trans_bit_byte_AVX - self.check = trans_bit_byte - - def test_04a_trans_bit_elem_AVX(self): - self.case = "bit T elem AVX 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_04b_trans_bit_elem_AVX_128(self): - self.case = "bit T elem AVX 128" - self.data = self.data.view(np.complex128) - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_04c_trans_bit_elem_AVX_32(self): - self.case = "bit T elem AVX 32" - self.data = self.data.view(np.float32) - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_04d_trans_bit_elem_AVX_16(self): - self.case = "bit T elem AVX 16" - self.data = self.data.view(np.int16) - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_04e_trans_bit_elem_64(self): - self.case = "bit T elem scal 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_scal - self.check = trans_bit_elem - - def test_04f_trans_bit_elem_SSE_32(self): - self.case = "bit T elem SSE 32" - self.data = self.data.view(np.float32) - self.fun = ext.trans_bit_elem_SSE - self.check = trans_bit_elem - - def test_04g_trans_bit_elem_SSE_64(self): - self.case = "bit T elem SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_SSE - self.check = trans_bit_elem - - def test_06a_untrans_bit_elem_16(self): - self.case = "bit U elem SSE 16" - pre_trans = self.data.view(np.int16) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_SSE - self.check_data = pre_trans - - def test_06b_untrans_bit_elem_128(self): - self.case = "bit U elem SSE 128" - pre_trans = self.data.view(np.complex128) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_SSE - self.check_data = pre_trans - - def test_06c_untrans_bit_elem_32(self): - self.case = "bit U elem SSE 32" - pre_trans = self.data.view(np.float32) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_SSE - self.check_data = pre_trans - - def test_06d_untrans_bit_elem_32(self): - self.case = "bit U elem AVX 32" - pre_trans = self.data.view(np.float32) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_AVX - self.check_data = pre_trans - - def test_06e_untrans_bit_elem_64(self): - self.case = "bit U elem SSE 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_SSE - self.check_data = pre_trans - - def test_06f_untrans_bit_elem_64(self): - self.case = "bit U elem AVX 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_AVX - self.check_data = pre_trans - - def test_06g_untrans_bit_elem_64(self): - self.case = "bit U elem scal 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_scal - self.check_data = pre_trans - - def test_07a_trans_byte_bitrow_64(self): - self.case = "byte T row scal 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_byte_bitrow_scal - - def test_07b_trans_byte_bitrow_SSE_64(self): - self.case = "byte T row SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_byte_bitrow_SSE - self.check = ext.trans_byte_bitrow_scal - - def test_07c_trans_byte_bitrow_AVX_64(self): - self.case = "byte T row AVX 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_byte_bitrow_AVX - self.check = ext.trans_byte_bitrow_scal - - def test_08a_shuffle_bit_eight_scal_64(self): - self.case = "bit S eight scal 64" - self.data = self.data.view(np.float64) - self.fun = ext.shuffle_bit_eightelem_scal - - def test_08b_shuffle_bit_eight_SSE_64(self): - self.case = "bit S eight SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.shuffle_bit_eightelem_SSE - self.check = ext.shuffle_bit_eightelem_scal - - def test_08c_shuffle_bit_eight_AVX_32(self): - self.case = "bit S eight AVX 32" - self.data = self.data.view(np.float32) - self.fun = ext.shuffle_bit_eightelem_AVX - self.check = ext.shuffle_bit_eightelem_scal - - def test_08d_shuffle_bit_eight_AVX_64(self): - self.case = "bit S eight AVX 64" - self.data = self.data.view(np.float64) - self.fun = ext.shuffle_bit_eightelem_AVX - self.check = ext.shuffle_bit_eightelem_scal - - def test_08e_shuffle_bit_eight_AVX_16(self): - self.case = "bit S eight AVX 16" - self.data = self.data.view(np.int16) - self.fun = ext.shuffle_bit_eightelem_AVX - self.check = ext.shuffle_bit_eightelem_scal - - def test_08f_shuffle_bit_eight_AVX_128(self): - self.case = "bit S eight AVX 128" - self.data = self.data.view(np.complex128) - self.fun = ext.shuffle_bit_eightelem_AVX - self.check = ext.shuffle_bit_eightelem_scal - - def test_09a_trans_bit_elem_scal_64(self): - self.case = "bit T elem scal 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_scal - self.check = trans_bit_elem - - def test_09b_trans_bit_elem_SSE_64(self): - self.case = "bit T elem SSE 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_SSE - self.check = trans_bit_elem - - def test_09c_trans_bit_elem_AVX_64(self): - self.case = "bit T elem AVX 64" - self.data = self.data.view(np.float64) - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_09d_untrans_bit_elem_scal_64(self): - self.case = "bit U elem scal 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_scal - self.check_data = pre_trans - - def test_09e_untrans_bit_elem_SSE_64(self): - self.case = "bit U elem SSE 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_SSE - self.check_data = pre_trans - - def test_09f_untrans_bit_elem_AVX_64(self): - self.case = "bit U elem AVX 64" - pre_trans = self.data.view(np.float64) - self.data = trans_bit_elem(pre_trans) - self.fun = ext.untrans_bit_elem_AVX - self.check_data = pre_trans - - def test_10a_bitshuffle_64(self): - self.case = "bitshuffle 64" - self.data = self.data.view(np.float64) - self.fun = lambda x: ext.bitshuffle(x, BLOCK) - - def test_10b_bitunshuffle_64(self): - self.case = "bitunshuffle 64" - pre_trans = self.data.view(np.float64) - self.data = ext.bitshuffle(pre_trans, BLOCK) - self.fun = lambda x: ext.bitunshuffle(x, BLOCK) - self.check_data = pre_trans - - def test_10c_compress_64(self): - self.case = "compress 64" - self.data = self.data.view(np.float64) - self.fun = lambda x:ext.compress_lz4(x, BLOCK) - - def test_10d_decompress_64(self): - self.case = "decompress 64" - pre_trans = self.data.view(np.float64) - self.data = ext.compress_lz4(pre_trans, BLOCK) - self.fun = lambda x: ext.decompress_lz4(x, pre_trans.shape, - pre_trans.dtype, BLOCK) - self.check_data = pre_trans - -""" -Commented out to prevent nose from finding them. -class TestDevCases(unittest.TestCase): - - def deactivated_test_trans_byte_bitrow_AVX(self): - d = np.arange(256, dtype=np.uint32) - #d = ext.trans_bit_elem(d) - t = ext.trans_byte_bitrow_AVX(d).view(np.uint8) - t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8) - t.shape = (32, 32) - t1.shape = (32, 32) - #print t[:20,:18] - self.assertTrue(np.all(t == t1)) - - def deactivated_test_untrans_bit_elem(self): - d = np.arange(32, dtype=np.uint16) - #d = random.randint(0, 2**7, 256).astype(np.uint16) - d1 = ext.trans_bit_elem(d) - #print d - t = ext.untrans_bit_elem_AVX(d1) - #t1 = ext.untrans_bit_byte_scal(d1) - #print np.reshape(d1.view(np.uint8), (16, 4)) - #print np.reshape(t1.view(np.uint8), (2, 32)) - #print np.reshape(t2.view(np.uint8), (32, 2)) - #print np.reshape(t.view(np.uint8), (32, 2)) - - def deactivated_test_trans_bit_byte(self): - d = np.arange(16, dtype=np.uint16) - t = ext.trans_bit_byte_scal(d) - #print t - t1 = trans_bit_byte(d) - #print t1 - self.assertTrue(np.all(t == t1)) - - def deactivated_test_trans_byte_bitrow_SSE(self): - d = np.arange(256, dtype = np.uint8) - t = ext.trans_byte_bitrow_scal(d) - #print np.reshape(t, (32, 8)) - t1 = ext.trans_byte_bitrow_SSE(d) - #print np.reshape(t1, (32, 8)) - self.assertTrue(np.all(t == t1)) - - def deactivated_test_trans_byte_elem_SSE(self): - d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')])) - d['a'] = np.arange(16) * 1 - d['b'] = np.arange(16) * 2 - d['c'] = np.arange(16) * 3 - #print d.dtype.itemsize - #print np.reshape(d.view(np.uint8), (16, 12)) - t1 = ext.trans_byte_elem_SSE(d) - #print np.reshape(t1.view(np.uint8), (12, 16)) - t0 = trans_byte_elem(d) - #print np.reshape(t0.view(np.uint8), (12, 16)) - self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8))) - - def deactivated_test_bitshuffle(self): - d = np.arange(128, dtype=np.uint16) - t1 = ext.bitshuffle(d) - #print t1 - t2 = ext.bitunshuffle(t1) - #print t2 - self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8))) -""" - - -class TestOddLengths(unittest.TestCase): - - def setUp(self): - self.reps = 10 - self.nmax = 128 * 8 - #self.nmax = 4 * 8 # XXX - self.fun = ext.copy - self.check = lambda x: x - - def test_trans_bit_elem_SSE(self): - self.fun = ext.trans_bit_elem_SSE - self.check = trans_bit_elem - - def test_untrans_bit_elem_SSE(self): - self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) - self.check = lambda x: x - - def test_trans_bit_elem_AVX(self): - self.fun = ext.trans_bit_elem_AVX - self.check = trans_bit_elem - - def test_untrans_bit_elem_AVX(self): - self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) - self.check = lambda x: x - - def test_trans_bit_elem_scal(self): - self.fun = ext.trans_bit_elem_scal - self.check = trans_bit_elem - - def test_untrans_bit_elem_scal(self): - self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x)) - self.check = lambda x: x - - def test_trans_byte_elem_SSE(self): - self.fun = ext.trans_byte_elem_SSE - self.check = trans_byte_elem - - def tearDown(self): - try: - for dtype in TEST_DTYPES: - itemsize = np.dtype(dtype).itemsize - nbyte_max = self.nmax * itemsize - dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) - dbuf = dbuf.view(dtype) - for ii in range(self.reps): - n = random.randint(0, self.nmax // 8, 1)[0] * 8 - data = dbuf[:n] - out = self.fun(data).view(np.uint8) - ans = self.check(data).view(np.uint8) - self.assertTrue(np.all(out == ans)) - except RuntimeError as err: - if (len(err.args) > 1 and (err.args[1] == -11) - and not ext.using_SSE2()): - return - if (len(err.args) > 1 and (err.args[1] == -12) - and not ext.using_AVX2()): - return - else: - raise - - -class TestBitShuffleCircle(unittest.TestCase): - """Ensure that final filter is circularly consistant for any data type and - any length buffer.""" - - def test_circle(self): - nmax = 100000 - reps = 20 - for dtype in TEST_DTYPES: - itemsize = np.dtype(dtype).itemsize - nbyte_max = nmax * itemsize - dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) - dbuf = dbuf.view(dtype) - for ii in range(reps): - n = random.randint(0, nmax, 1)[0] - data = dbuf[:n] - shuff = ext.bitshuffle(data) - out = ext.bitunshuffle(shuff) - self.assertTrue(out.dtype is data.dtype) - self.assertTrue(np.all(data.view(np.uint8) - == out.view(np.uint8))) - - def test_circle_with_compression(self): - nmax = 100000 - reps = 20 - for dtype in TEST_DTYPES: - itemsize = np.dtype(dtype).itemsize - nbyte_max = nmax * itemsize - dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) - dbuf = dbuf.view(dtype) - for ii in range(reps): - n = random.randint(0, nmax, 1)[0] - data = dbuf[:n] - shuff = ext.compress_lz4(data) - out = ext.decompress_lz4(shuff, data.shape, data.dtype) - self.assertTrue(out.dtype is data.dtype) - self.assertTrue(np.all(data.view(np.uint8) - == out.view(np.uint8))) - - -# Python implementations for checking results. - -def trans_byte_elem(arr): - dtype = arr.dtype - itemsize = dtype.itemsize - in_buf = arr.flat[:].view(np.uint8) - nelem = in_buf.size // itemsize - in_buf.shape = (nelem, itemsize) - - out_buf = np.empty((itemsize, nelem), dtype=np.uint8) - for ii in range(nelem): - for jj in range(itemsize): - out_buf[jj,ii] = in_buf[ii,jj] - return out_buf.flat[:].view(dtype) - - -def trans_bit_byte(arr): - n = arr.size - dtype = arr.dtype - itemsize = dtype.itemsize - bits = np.unpackbits(arr.view(np.uint8)) - bits.shape = (n * itemsize, 8) - # We have to reverse the order of the bits both for unpacking and packing, - # since we want to call the least significant bit the first bit. - bits = bits[:,::-1] - bits_shuff = (bits.T).copy() - bits_shuff.shape = (n * itemsize, 8) - bits_shuff = bits_shuff[:,::-1] - arr_bt = np.packbits(bits_shuff.flat[:]) - return arr_bt.view(dtype) - - -def trans_bit_elem(arr): - n = arr.size - dtype = arr.dtype - itemsize = dtype.itemsize - bits = np.unpackbits(arr.view(np.uint8)) - bits.shape = (n * itemsize, 8) - # We have to reverse the order of the bits both for unpacking and packing, - # since we want to call the least significant bit the first bit. - bits = bits[:,::-1].copy() - bits.shape = (n, itemsize * 8) - bits_shuff = (bits.T).copy() - bits_shuff.shape = (n * itemsize, 8) - bits_shuff = bits_shuff[:,::-1] - arr_bt = np.packbits(bits_shuff.flat[:]) - return arr_bt.view(dtype) - - - -if __name__ == "__main__": - unittest.main() diff --git a/src/bitshuffle/bitshuffle/tests/test_h5filter.py b/src/bitshuffle/bitshuffle/tests/test_h5filter.py deleted file mode 100644 index 6739b998..00000000 --- a/src/bitshuffle/bitshuffle/tests/test_h5filter.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import unittest -import os -import glob - -import numpy as np -import h5py -from h5py import h5f, h5d, h5z, h5t, h5s, filters -from subprocess import Popen, PIPE, STDOUT - -from bitshuffle import h5 - - -os.environ["HDF5_PLUGIN_PATH"] = "" - - -class TestFilter(unittest.TestCase): - - def test_filter(self): - shape = (32 * 1024 + 783,) - chunks = (4 * 1024 + 23,) - dtype = np.int64 - data = np.arange(shape[0]) - fname = "tmp_test_filters.h5" - f = h5py.File(fname) - h5.create_dataset(f, b"range", shape, dtype, chunks, - filter_pipeline=(32008, 32000), - filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), - filter_opts=None) - f["range"][:] = data - - f.close() - - f = h5py.File(fname, 'r') - d = f['range'][:] - self.assertTrue(np.all(d == data)) - f.close() - - def test_with_block_size(self): - shape = (128 * 1024 + 783,) - chunks = (4 * 1024 + 23,) - dtype = np.int64 - data = np.arange(shape[0]) - fname = "tmp_test_filters.h5" - f = h5py.File(fname) - h5.create_dataset(f, b"range", shape, dtype, chunks, - filter_pipeline=(32008, 32000), - filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), - filter_opts=((680,), ()), - ) - f["range"][:] = data - - f.close() - #os.system('h5dump -H -p tmp_test_filters.h5') - - f = h5py.File(fname, 'r') - d = f['range'][:] - self.assertTrue(np.all(d == data)) - f.close() - - def test_with_compression(self): - shape = (128 * 1024 + 783,) - chunks = (4 * 1024 + 23,) - dtype = np.int64 - data = np.arange(shape[0]) - fname = "tmp_test_filters.h5" - f = h5py.File(fname) - h5.create_dataset(f, b"range", shape, dtype, chunks, - filter_pipeline=(32008,), - filter_flags=(h5z.FLAG_MANDATORY,), - filter_opts=((0, h5.H5_COMPRESS_LZ4),), - ) - f["range"][:] = data - - f.close() - #os.system('h5dump -H -p tmp_test_filters.h5') - - f = h5py.File(fname, 'r') - d = f['range'][:] - self.assertTrue(np.all(d == data)) - f.close() - - def tearDown(self): - files = glob.glob("tmp_test_*") - for f in files: - os.remove(f) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/bitshuffle/bitshuffle/tests/test_h5plugin.py b/src/bitshuffle/bitshuffle/tests/test_h5plugin.py deleted file mode 100644 index 220d55da..00000000 --- a/src/bitshuffle/bitshuffle/tests/test_h5plugin.py +++ /dev/null @@ -1,83 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals -import unittest -import os, os.path -import glob - -import numpy as np -import h5py -from h5py import h5f, h5d, h5z, h5t, h5s, filters -from subprocess import Popen, PIPE, STDOUT - -import bitshuffle - - -plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__), - 'plugin') -os.environ["HDF5_PLUGIN_PATH"] = plugin_dir - - -H5VERSION = h5py.h5.get_libversion() -if (H5VERSION[0] < 1 or (H5VERSION[0] == 1 - and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11)))): - H51811P = False -else: - H51811P = True - - -class TestFilterPlugins(unittest.TestCase): - - def test_plugins(self): - if not H51811P: - return - shape = (32 * 1024,) - chunks = (4 * 1024,) - dtype = np.int64 - data = np.arange(shape[0]) - fname = "tmp_test_filters.h5" - f = h5py.File(fname) - tid = h5t.py_create(dtype, logical=1) - sid = h5s.create_simple(shape, shape) - # Different API's for different h5py versions. - try: - dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, - None, None, None, None) - except TypeError: - dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None, - None, None, None) - dcpl.set_filter(32008, h5z.FLAG_MANDATORY) - dcpl.set_filter(32000, h5z.FLAG_MANDATORY) - dset_id = h5d.create(f.id, b"range", tid, sid, dcpl=dcpl) - dset_id.write(h5s.ALL, h5s.ALL, data) - f.close() - - # Make sure the filters are working outside of h5py by calling h5dump - h5dump = Popen(['h5dump', fname], - stdout=PIPE, stderr=STDOUT) - stdout, nothing = h5dump.communicate() - err = h5dump.returncode - self.assertEqual(err, 0) - - - f = h5py.File(fname, 'r') - d = f['range'][:] - self.assertTrue(np.all(d == data)) - f.close() - - - #def test_h5py_hl(self): - # if not H51811P: - # return - # # Does not appear to be supported by h5py. - # fname = "tmp_test_h5py_hl.h5" - # f = h5py.File(fname) - # f.create_dataset("range", np.arange(1024, dtype=np.int64), - # compression=32008) - - def tearDown(self): - files = glob.glob("tmp_test_*") - for f in files: - os.remove(f) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/bitshuffle/bitshuffle/tests/test_regression.py b/src/bitshuffle/bitshuffle/tests/test_regression.py deleted file mode 100644 index 2862cace..00000000 --- a/src/bitshuffle/bitshuffle/tests/test_regression.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Test that data encoded with earlier versions can still be decoded correctly. - -""" - -from __future__ import absolute_import, division, print_function - -import unittest -from os import path - -import numpy as np -import h5py - -import bitshuffle -from bitshuffle import h5 - - -TEST_DATA_DIR = path.dirname(bitshuffle.__file__) + "/tests/data" - -OUT_FILE_TEMPLATE = TEST_DATA_DIR + "/regression_%s.h5" - -VERSIONS = ["0.1.3",] - - -class TestAll(unittest.TestCase): - - def test_regression(self): - for version in VERSIONS: - file_name = OUT_FILE_TEMPLATE % version - f = h5py.File(file_name) - g_orig = f["origional"] - g_comp = f["compressed"] - - for dset_name in g_comp.keys(): - self.assertTrue(np.all(g_comp[dset_name][:] - == g_orig[dset_name][:])) - - -if __name__ == "__main__": - unittest.main() diff --git a/src/bitshuffle/conda-recipe/bld.bat b/src/bitshuffle/conda-recipe/bld.bat deleted file mode 100644 index ccbb10f9..00000000 --- a/src/bitshuffle/conda-recipe/bld.bat +++ /dev/null @@ -1,3 +0,0 @@ -SET CONDA_HOME=%PREFIX% -"%PYTHON%" setup.py install -if errorlevel 1 exit 1 diff --git a/src/bitshuffle/conda-recipe/build.sh b/src/bitshuffle/conda-recipe/build.sh deleted file mode 100644 index 34c3a689..00000000 --- a/src/bitshuffle/conda-recipe/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -export CONDA_HOME=$PREFIX -$PYTHON setup.py install # Python command to install the script diff --git a/src/bitshuffle/conda-recipe/meta.yaml b/src/bitshuffle/conda-recipe/meta.yaml deleted file mode 100644 index ac227e2b..00000000 --- a/src/bitshuffle/conda-recipe/meta.yaml +++ /dev/null @@ -1,27 +0,0 @@ -package: - name: bitshuffle - version: 0.2.1 -source: - # git_url: https://github.com/kiyo-masui/bitshuffle.git - # git_rev: 0.2.1 - path: .. - patches: - - setup.py.patch - -requirements: - build: - - python - - setuptools - - cython - - numpy - - h5py - - hdf5 - run: - - python - - numpy - - h5py - - cython - -about: - home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py - summary: "bitshuffle library." diff --git a/src/bitshuffle/conda-recipe/setup.py.patch b/src/bitshuffle/conda-recipe/setup.py.patch deleted file mode 100644 index 437a5ffa..00000000 --- a/src/bitshuffle/conda-recipe/setup.py.patch +++ /dev/null @@ -1,13 +0,0 @@ ---- setup.py 2016-01-19 16:56:12.954563000 +0100 -+++ xxx.py 2016-01-19 16:56:00.817087000 +0100 -@@ -40,8 +40,8 @@ - - # Copied from h5py. - # TODO, figure out what the canonacal way to do this should be. --INCLUDE_DIRS = [] --LIBRARY_DIRS = [] -+INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include'] -+LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib'] - if sys.platform == 'darwin': - # putting here both macports and homebrew paths will generate - # "ld: warning: dir not found" at the linking phase diff --git a/src/bitshuffle/lz4/LICENSE b/src/bitshuffle/lz4/LICENSE deleted file mode 100644 index b566df30..00000000 --- a/src/bitshuffle/lz4/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -LZ4 Library -Copyright (c) 2011-2014, Yann Collet -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/src/bitshuffle/lz4/README.md b/src/bitshuffle/lz4/README.md deleted file mode 100644 index f6ebf5e1..00000000 --- a/src/bitshuffle/lz4/README.md +++ /dev/null @@ -1,21 +0,0 @@ -LZ4 - Library Files -================================ - -The __lib__ directory contains several files, but you don't necessarily need them all. - -To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**". - -For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly. - -If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm. -(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.) - -A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***. - -The other files are not source code. There are : - - - LICENSE : contains the BSD license text - - Makefile : script to compile or install lz4 library (static or dynamic) - - liblz4.pc.in : for pkg-config (make install) - -[official interoperable frame format]: ../lz4_Frame_format.md diff --git a/src/bitshuffle/lz4/lz4.c b/src/bitshuffle/lz4/lz4.c deleted file mode 100644 index 08cf6b5c..00000000 --- a/src/bitshuffle/lz4/lz4.c +++ /dev/null @@ -1,1516 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/Cyan4973/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ - - -/************************************** -* Tuning parameters -**************************************/ -/* - * HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). - */ -#define HEAPMODE 0 - -/* - * ACCELERATION_DEFAULT : - * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 - */ -#define ACCELERATION_DEFAULT 1 - - -/************************************** -* CPU Feature Detection -**************************************/ -/* - * LZ4_FORCE_SW_BITCOUNT - * Define this parameter if your target system or compiler does not support hardware bit count - */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -/************************************** -* Includes -**************************************/ -#include "lz4.h" - - -/************************************** -* Compiler Options -**************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ -#else -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# if defined(__GNUC__) || defined(__clang__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif /* _MSC_VER */ - -/* LZ4_GCC_VERSION is defined into lz4.h */ -#if (LZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -/************************************** -* Memory routines -**************************************/ -#include /* malloc, calloc, free */ -#define ALLOCATOR(n,s) calloc(n,s) -#define FREEMEM free -#include /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** -* Basic Types -**************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - - -/************************************** -* Reading and writing into memory -**************************************/ -#define STEPSIZE sizeof(size_t) - -static unsigned LZ4_64bits(void) { return sizeof(void*)==8; } - -static unsigned LZ4_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - - -static U16 LZ4_read16(const void* memPtr) -{ - U16 val16; - memcpy(&val16, memPtr, 2); - return val16; -} - -static U16 LZ4_readLE16(const void* memPtr) -{ - if (LZ4_isLittleEndian()) - { - return LZ4_read16(memPtr); - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] + (p[1]<<8)); - } -} - -static void LZ4_writeLE16(void* memPtr, U16 value) -{ - if (LZ4_isLittleEndian()) - { - memcpy(memPtr, &value, 2); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -static U32 LZ4_read32(const void* memPtr) -{ - U32 val32; - memcpy(&val32, memPtr, 4); - return val32; -} - -static U64 LZ4_read64(const void* memPtr) -{ - U64 val64; - memcpy(&val64, memPtr, 8); - return val64; -} - -static size_t LZ4_read_ARCH(const void* p) -{ - if (LZ4_64bits()) - return (size_t)LZ4_read64(p); - else - return (size_t)LZ4_read32(p); -} - - -static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); } - -static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); } - -/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ -static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* e = (BYTE*)dstEnd; - do { LZ4_copy8(d,s); d+=8; s+=8; } while (d>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } - else /* Big Endian CPU */ - { - if (LZ4_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll((U64)val) >> 3); -# else - unsigned r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } - } -} - -static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while (likely(pIn compression run slower on incompressible data */ - - -/************************************** -* Local Structures and types -**************************************/ -typedef struct { - U32 hashTable[HASH_SIZE_U32]; - U32 currentOffset; - U32 initCheck; - const BYTE* dictionary; - BYTE* bufferStart; /* obsolete, used for slideInputBuffer */ - U32 dictSize; -} LZ4_stream_t_internal; - -typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; -typedef enum { byPtr, byU32, byU16 } tableType_t; - -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; - - -/************************************** -* Local Utils -**************************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -int LZ4_sizeofState() { return LZ4_STREAMSIZE; } - - - -/******************************** -* Compression functions -********************************/ - -static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType) -{ - if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); - else - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); -} - -static const U64 prime5bytes = 889523592379ULL; -static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType) -{ - const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; - const U32 hashMask = (1<> (40 - hashLog)) & hashMask; -} - -static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType) -{ - if (LZ4_64bits()) - return LZ4_hashSequence64(sequence, tableType); - return LZ4_hashSequence((U32)sequence, tableType); -} - -static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); } - -static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) -{ - switch (tableType) - { - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } - } -} - -static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - U32 h = LZ4_hashPosition(p, tableType); - LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); -} - -static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } - if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } - { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ -} - -static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - U32 h = LZ4_hashPosition(p, tableType); - return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); -} - -FORCE_INLINE int LZ4_compress_generic( - void* const ctx, - const char* const source, - char* const dest, - const int inputSize, - const int maxOutputSize, - const limitedOutput_directive outputLimited, - const tableType_t tableType, - const dict_directive dict, - const dictIssue_directive dictIssue, - const U32 acceleration) -{ - LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx; - - const BYTE* ip = (const BYTE*) source; - const BYTE* base; - const BYTE* lowLimit; - const BYTE* const lowRefLimit = ip - dictPtr->dictSize; - const BYTE* const dictionary = dictPtr->dictionary; - const BYTE* const dictEnd = dictionary + dictPtr->dictSize; - const size_t dictDelta = dictEnd - (const BYTE*)source; - const BYTE* anchor = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - - BYTE* op = (BYTE*) dest; - BYTE* const olimit = op + maxOutputSize; - - U32 forwardH; - size_t refDelta=0; - - /* Init conditions */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - switch(dict) - { - case noDict: - default: - base = (const BYTE*)source; - lowLimit = (const BYTE*)source; - break; - case withPrefix64k: - base = (const BYTE*)source - dictPtr->currentOffset; - lowLimit = (const BYTE*)source - dictPtr->dictSize; - break; - case usingExtDict: - base = (const BYTE*)source - dictPtr->currentOffset; - lowLimit = (const BYTE*)source; - break; - } - if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ - if (inputSize> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimit)) goto _last_literals; - - match = LZ4_getPositionOnHash(h, ctx, tableType, base); - if (dict==usingExtDict) - { - if (match<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else - { - refDelta = 0; - lowLimit = (const BYTE*)source; - } - } - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - - } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0) - || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) ); - } - - /* Catch up */ - while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; } - - { - /* Encode Literal length */ - unsigned litLength = (unsigned)(ip - anchor); - token = op++; - if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) - return 0; /* Check output limit */ - if (litLength>=RUN_MASK) - { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength< matchlimit) limit = matchlimit; - matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); - ip += MINMATCH + matchLength; - if (ip==limit) - { - unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit); - matchLength += more; - ip += more; - } - } - else - { - matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); - ip += MINMATCH + matchLength; - } - - if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit))) - return 0; /* Check output limit */ - if (matchLength>=ML_MASK) - { - *token += ML_MASK; - matchLength -= ML_MASK; - for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; } - if (matchLength >= 255) { matchLength-=255; *op++ = 255; } - *op++ = (BYTE)matchLength; - } - else *token += (BYTE)(matchLength); - } - - anchor = ip; - - /* Test end of chunk */ - if (ip > mflimit) break; - - /* Fill table */ - LZ4_putPosition(ip-2, ctx, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, ctx, tableType, base); - if (dict==usingExtDict) - { - if (match<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else - { - refDelta = 0; - lowLimit = (const BYTE*)source; - } - } - LZ4_putPosition(ip, ctx, tableType, base); - if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) - && (match+MAX_DISTANCE>=ip) - && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - const size_t lastRun = (size_t)(iend - anchor); - if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) - return 0; /* Check output limit */ - if (lastRun >= RUN_MASK) - { - size_t accumulator = lastRun - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } - else - { - *op++ = (BYTE)(lastRun<= LZ4_compressBound(inputSize)) - { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); - } - else - { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); - } -} - - -int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ -#if (HEAPMODE) - void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ -#else - LZ4_stream_t ctx; - void* ctxPtr = &ctx; -#endif - - int result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); - -#if (HEAPMODE) - FREEMEM(ctxPtr); -#endif - return result; -} - - -int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize) -{ - return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1); -} - - -/* hidden debug function */ -/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */ -int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t ctx; - - LZ4_resetStream(&ctx); - - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); -} - - -/******************************** -* destSize variant -********************************/ - -static int LZ4_compress_destSize_generic( - void* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - const int targetDstSize, - const tableType_t tableType) -{ - const BYTE* ip = (const BYTE*) src; - const BYTE* base = (const BYTE*) src; - const BYTE* lowLimit = (const BYTE*) src; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - - BYTE* op = (BYTE*) dst; - BYTE* const oend = op + targetDstSize; - BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */; - BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */); - BYTE* const oMaxSeq = oMaxLit - 1 /* token */; - - U32 forwardH; - - - /* Init conditions */ - if (targetDstSize < 1) return 0; /* Impossible to store anything */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ - if (*srcSizePtr> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimit)) - goto _last_literals; - - match = LZ4_getPositionOnHash(h, ctx, tableType, base); - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - - } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match) != LZ4_read32(ip)) ); - } - - /* Catch up */ - while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } - - { - /* Encode Literal length */ - unsigned litLength = (unsigned)(ip - anchor); - token = op++; - if (op + ((litLength+240)/255) + litLength > oMaxLit) - { - /* Not enough space for a last match */ - op--; - goto _last_literals; - } - if (litLength>=RUN_MASK) - { - unsigned len = litLength - RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength< oMaxMatch) - { - /* Match description too long : reduce it */ - matchLength = (15-1) + (oMaxMatch-op) * 255; - } - //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH); - ip += MINMATCH + matchLength; - - if (matchLength>=ML_MASK) - { - *token += ML_MASK; - matchLength -= ML_MASK; - while (matchLength >= 255) { matchLength-=255; *op++ = 255; } - *op++ = (BYTE)matchLength; - } - else *token += (BYTE)(matchLength); - } - - anchor = ip; - - /* Test end of block */ - if (ip > mflimit) break; - if (op > oMaxSeq) break; - - /* Fill table */ - LZ4_putPosition(ip-2, ctx, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, ctx, tableType, base); - LZ4_putPosition(ip, ctx, tableType, base); - if ( (match+MAX_DISTANCE>=ip) - && (LZ4_read32(match)==LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - size_t lastRunSize = (size_t)(iend - anchor); - if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) - { - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (oend-op) - 1; - lastRunSize -= (lastRunSize+240)/255; - } - ip = anchor + lastRunSize; - - if (lastRunSize >= RUN_MASK) - { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } - else - { - *op++ = (BYTE)(lastRunSize<= LZ4_compressBound(*srcSizePtr)) /* compression success is guaranteed */ - { - return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); - } - else - { - if (*srcSizePtr < LZ4_64Klimit) - return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16); - else - return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr); - } -} - - -int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) -{ -#if (HEAPMODE) - void* ctx = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ -#else - LZ4_stream_t ctxBody; - void* ctx = &ctxBody; -#endif - - int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); - -#if (HEAPMODE) - FREEMEM(ctx); -#endif - return result; -} - - - -/******************************** -* Streaming functions -********************************/ - -LZ4_stream_t* LZ4_createStream(void) -{ - LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); - LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ - LZ4_resetStream(lz4s); - return lz4s; -} - -void LZ4_resetStream (LZ4_stream_t* LZ4_stream) -{ - MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); -} - -int LZ4_freeStream (LZ4_stream_t* LZ4_stream) -{ - FREEMEM(LZ4_stream); - return (0); -} - - -#define HASH_UNIT sizeof(size_t) -int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) -{ - LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; - const BYTE* p = (const BYTE*)dictionary; - const BYTE* const dictEnd = p + dictSize; - const BYTE* base; - - if ((dict->initCheck) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */ - LZ4_resetStream(LZ4_dict); - - if (dictSize < (int)HASH_UNIT) - { - dict->dictionary = NULL; - dict->dictSize = 0; - return 0; - } - - if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; - dict->currentOffset += 64 KB; - base = p - dict->currentOffset; - dict->dictionary = p; - dict->dictSize = (U32)(dictEnd - p); - dict->currentOffset += dict->dictSize; - - while (p <= dictEnd-HASH_UNIT) - { - LZ4_putPosition(p, dict->hashTable, byU32, base); - p+=3; - } - - return dict->dictSize; -} - - -static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) -{ - if ((LZ4_dict->currentOffset > 0x80000000) || - ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */ - { - /* rescale hash table */ - U32 delta = LZ4_dict->currentOffset - 64 KB; - const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; - int i; - for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; - else LZ4_dict->hashTable[i] -= delta; - } - LZ4_dict->currentOffset = 64 KB; - if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; - LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; - } -} - - -int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream; - const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; - - const BYTE* smallest = (const BYTE*) source; - if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ - if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd; - LZ4_renormDictT(streamPtr, smallest); - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; - - /* Check overlapping input/dictionary space */ - { - const BYTE* sourceEnd = (const BYTE*) source + inputSize; - if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) - { - streamPtr->dictSize = (U32)(dictEnd - sourceEnd); - if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; - if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; - streamPtr->dictionary = dictEnd - streamPtr->dictSize; - } - } - - /* prefix mode : source data follows dictionary */ - if (dictEnd == (const BYTE*)source) - { - int result; - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); - else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); - streamPtr->dictSize += (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } - - /* external dictionary mode */ - { - int result; - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration); - else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration); - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } -} - - -/* Hidden debug function, to force external dictionary mode */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) -{ - LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict; - int result; - const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; - - const BYTE* smallest = dictEnd; - if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; - LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest); - - result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); - - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - - return result; -} - - -int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) -{ - LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; - const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; - - if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ - if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; - - memmove(safeBuffer, previousDictEnd - dictSize, dictSize); - - dict->dictionary = (const BYTE*)safeBuffer; - dict->dictSize = (U32)dictSize; - - return dictSize; -} - - - -/******************************* -* Decompression functions -*******************************/ -/* - * This generic decompression function cover all use cases. - * It shall be instantiated several times, using different sets of directives - * Note that it is essential this generic function is really inlined, - * in order to remove useless branches during compilation optimization. - */ -FORCE_INLINE int LZ4_decompress_generic( - const char* const source, - char* const dest, - int inputSize, - int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ - - int endOnInput, /* endOnOutputSize, endOnInputSize */ - int partialDecoding, /* full, partial */ - int targetOutputSize, /* only used if partialDecoding==partial */ - int dict, /* noDict, withPrefix64k, usingExtDict */ - const BYTE* const lowPrefix, /* == dest if dict == noDict */ - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note : = 0 if noDict */ - ) -{ - /* Local Variables */ - const BYTE* ip = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + outputSize; - BYTE* cpy; - BYTE* oexit = op + targetOutputSize; - const BYTE* const lowLimit = lowPrefix - dictSize; - - const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; - const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; - const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; - - const int safeDecode = (endOnInput==endOnInputSize); - const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); - - - /* Special cases */ - if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ - if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ - if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); - - - /* Main Loop */ - while (1) - { - unsigned token; - size_t length; - const BYTE* match; - - /* get literal length */ - token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) - { - unsigned s; - do - { - s = *ip++; - length += s; - } - while (likely((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) - || ((!endOnInput) && (cpy>oend-COPYLENGTH))) - { - if (partialDecoding) - { - if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ - if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ - } - else - { - if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ - if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ - } - memcpy(op, ip, length); - ip += length; - op += length; - break; /* Necessarily EOF, due to parsing restrictions */ - } - LZ4_wildCopy(op, ip, cpy); - ip += length; op = cpy; - - /* get offset */ - match = cpy - LZ4_readLE16(ip); ip+=2; - if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ - - /* get matchlength */ - length = token & ML_MASK; - if (length == ML_MASK) - { - unsigned s; - do - { - if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error; - s = *ip++; - length += s; - } while (s==255); - if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */ - } - length += MINMATCH; - - /* check external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) - { - if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ - - if (length <= (size_t)(lowPrefix-match)) - { - /* match can be copied as a single segment from external dictionary */ - match = dictEnd - (lowPrefix-match); - memmove(op, match, length); op += length; - } - else - { - /* match encompass external dictionary and current segment */ - size_t copySize = (size_t)(lowPrefix-match); - memcpy(op, dictEnd - copySize, copySize); - op += copySize; - copySize = length - copySize; - if (copySize > (size_t)(op-lowPrefix)) /* overlap within current segment */ - { - BYTE* const endOfMatch = op + copySize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) *op++ = *copyFrom++; - } - else - { - memcpy(op, lowPrefix, copySize); - op += copySize; - } - } - continue; - } - - /* copy repeated sequence */ - cpy = op + length; - if (unlikely((op-match)<8)) - { - const size_t dec64 = dec64table[op-match]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[op-match]; - LZ4_copy4(op+4, match); - op += 8; match -= dec64; - } else { LZ4_copy8(op, match); op+=8; match+=8; } - - if (unlikely(cpy>oend-12)) - { - if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */ - if (op < oend-8) - { - LZ4_wildCopy(op, match, oend-8); - match += (oend-8) - op; - op = oend-8; - } - while (opprefixSize = (size_t) dictSize; - lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; - lz4sd->externalDict = NULL; - lz4sd->extDictSize = 0; - return 1; -} - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks must still be available at the memory position where they were decoded. - If it's not possible, save the relevant part of decoded data into a safe buffer, - and indicate where it stands using LZ4_setStreamDecode() -*/ -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; - int result; - - if (lz4sd->prefixEnd == (BYTE*)dest) - { - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; - } - else - { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } - - return result; -} - -int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) -{ - LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; - int result; - - if (lz4sd->prefixEnd == (BYTE*)dest) - { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; - } - else - { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } - - return result; -} - - -/* -Advanced decoding functions : -*_usingDict() : - These decoding functions work the same as "_continue" ones, - the dictionary must be explicitly provided within parameters -*/ - -FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize) -{ - if (dictSize==0) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0); - if (dictStart+dictSize == dest) - { - if (dictSize >= (int)(64 KB - 1)) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0); - } - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - -int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize); -} - -int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize); -} - -/* debug function */ -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - - -/*************************************************** -* Obsolete Functions -***************************************************/ -/* obsolete compression functions */ -int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); } -int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); } -int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); } -int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); } -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); } -int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); } - -/* -These function names are deprecated and should no longer be used. -They are only provided here for compatibility with older user programs. -- LZ4_uncompress is totally equivalent to LZ4_decompress_fast -- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe -*/ -int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } - - -/* Obsolete Streaming functions */ - -int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } - -static void LZ4_init(LZ4_stream_t_internal* lz4ds, BYTE* base) -{ - MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE); - lz4ds->bufferStart = base; -} - -int LZ4_resetStreamState(void* state, char* inputBuffer) -{ - if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ - LZ4_init((LZ4_stream_t_internal*)state, (BYTE*)inputBuffer); - return 0; -} - -void* LZ4_create (char* inputBuffer) -{ - void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64); - LZ4_init ((LZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer); - return lz4ds; -} - -char* LZ4_slideInputBuffer (void* LZ4_Data) -{ - LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data; - int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB); - return (char*)(ctx->bufferStart + dictSize); -} - -/* Obsolete streaming decompression functions */ - -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) -{ - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - -#endif /* LZ4_COMMONDEFS_ONLY */ - diff --git a/src/bitshuffle/lz4/lz4.h b/src/bitshuffle/lz4/lz4.h deleted file mode 100644 index 3e740022..00000000 --- a/src/bitshuffle/lz4/lz4.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Header File - Copyright (C) 2011-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/Cyan4973/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -/* - * lz4.h provides block compression functions, and gives full buffer control to programmer. - * If you need to generate inter-operable compressed data (respecting LZ4 frame specification), - * and can let the library handle its own memory, please use lz4frame.h instead. -*/ - -/************************************** -* Version -**************************************/ -#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ4_VERSION_MINOR 7 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ -#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) -int LZ4_versionNumber (void); - -/************************************** -* Tuning parameter -**************************************/ -/* - * LZ4_MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) - * Increasing memory usage improves compression ratio - * Reduced memory usage can improve speed, due to cache effect - * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache - */ -#define LZ4_MEMORY_USAGE 14 - - -/************************************** -* Simple Functions -**************************************/ - -int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize); -int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize); - -/* -LZ4_compress_default() : - Compresses 'sourceSize' bytes from buffer 'source' - into already allocated 'dest' buffer of size 'maxDestSize'. - Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize). - It also runs faster, so it's a recommended setting. - If the function cannot compress 'source' into a more limited 'dest' budget, - compression stops *immediately*, and the function result is zero. - As a consequence, 'dest' content is not valid. - This function never writes outside 'dest' buffer, nor read outside 'source' buffer. - sourceSize : Max supported value is LZ4_MAX_INPUT_VALUE - maxDestSize : full or partial size of buffer 'dest' (which must be already allocated) - return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize) - or 0 if compression fails - -LZ4_decompress_safe() : - compressedSize : is the precise full size of the compressed block. - maxDecompressedSize : is the size of destination buffer, which must be already allocated. - return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize) - If destination buffer is not large enough, decoding will stop and output an error code (<0). - If the source stream is detected malformed, the function will stop decoding and return a negative result. - This function is protected against buffer overflow exploits, including malicious data packets. - It never writes outside output buffer, nor reads outside input buffer. -*/ - - -/************************************** -* Advanced Functions -**************************************/ -#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ -#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) - -/* -LZ4_compressBound() : - Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) - This function is primarily useful for memory allocation purposes (destination buffer size). - Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). - Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize) - inputSize : max supported value is LZ4_MAX_INPUT_SIZE - return : maximum output size in a "worst case" scenario - or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) -*/ -int LZ4_compressBound(int inputSize); - -/* -LZ4_compress_fast() : - Same as LZ4_compress_default(), but allows to select an "acceleration" factor. - The larger the acceleration value, the faster the algorithm, but also the lesser the compression. - It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. - An acceleration value of "1" is the same as regular LZ4_compress_default() - Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1. -*/ -int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration); - - -/* -LZ4_compress_fast_extState() : - Same compression function, just using an externally allocated memory space to store compression state. - Use LZ4_sizeofState() to know how much memory must be allocated, - and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. -*/ -int LZ4_sizeofState(void); -int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration); - - -/* -LZ4_compress_destSize() : - Reverse the logic, by compressing as much data as possible from 'source' buffer - into already allocated buffer 'dest' of size 'targetDestSize'. - This function either compresses the entire 'source' content into 'dest' if it's large enough, - or fill 'dest' buffer completely with as much data as possible from 'source'. - *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'. - New value is necessarily <= old value. - return : Nb bytes written into 'dest' (necessarily <= targetDestSize) - or 0 if compression fails -*/ -int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize); - - -/* -LZ4_decompress_fast() : - originalSize : is the original and therefore uncompressed size - return : the number of bytes read from the source buffer (in other words, the compressed size) - If the source stream is detected malformed, the function will stop decoding and return a negative result. - Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes. - note : This function fully respect memory boundaries for properly formed compressed data. - It is a bit faster than LZ4_decompress_safe(). - However, it does not provide any protection against intentionally modified data stream (malicious input). - Use this function in trusted environment only (data to decode comes from a trusted source). -*/ -int LZ4_decompress_fast (const char* source, char* dest, int originalSize); - -/* -LZ4_decompress_safe_partial() : - This function decompress a compressed block of size 'compressedSize' at position 'source' - into destination buffer 'dest' of size 'maxDecompressedSize'. - The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, - reducing decompression time. - return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize) - Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. - Always control how many bytes were decoded. - If the source stream is detected malformed, the function will stop decoding and return a negative result. - This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets -*/ -int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); - - -/*********************************************** -* Streaming Compression Functions -***********************************************/ -#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(long long)) -/* - * LZ4_stream_t - * information structure to track an LZ4 stream. - * important : init this structure content before first use ! - * note : only allocated directly the structure if you are statically linking LZ4 - * If you are using liblz4 as a DLL, please use below construction methods instead. - */ -typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t; - -/* - * LZ4_resetStream - * Use this function to init an allocated LZ4_stream_t structure - */ -void LZ4_resetStream (LZ4_stream_t* streamPtr); - -/* - * LZ4_createStream will allocate and initialize an LZ4_stream_t structure - * LZ4_freeStream releases its memory. - * In the context of a DLL (liblz4), please use these methods rather than the static struct. - * They are more future proof, in case of a change of LZ4_stream_t size. - */ -LZ4_stream_t* LZ4_createStream(void); -int LZ4_freeStream (LZ4_stream_t* streamPtr); - -/* - * LZ4_loadDict - * Use this function to load a static dictionary into LZ4_stream. - * Any previous data will be forgotten, only 'dictionary' will remain in memory. - * Loading a size of 0 is allowed. - * Return : dictionary size, in bytes (necessarily <= 64 KB) - */ -int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); - -/* - * LZ4_compress_fast_continue - * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio. - * Important : Previous data blocks are assumed to still be present and unmodified ! - * 'dst' buffer must be already allocated. - * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero. - */ -int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration); - -/* - * LZ4_saveDict - * If previously compressed data block is not guaranteed to remain available at its memory location - * save it into a safer place (char* safeBuffer) - * Note : you don't need to call LZ4_loadDict() afterwards, - * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue() - * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error - */ -int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize); - - -/************************************************ -* Streaming Decompression Functions -************************************************/ - -#define LZ4_STREAMDECODESIZE_U64 4 -#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) -typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; -/* - * LZ4_streamDecode_t - * information structure to track an LZ4 stream. - * init this structure content using LZ4_setStreamDecode or memset() before first use ! - * - * In the context of a DLL (liblz4) please prefer usage of construction methods below. - * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future. - * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure - * LZ4_freeStreamDecode releases its memory. - */ -LZ4_streamDecode_t* LZ4_createStreamDecode(void); -int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); - -/* - * LZ4_setStreamDecode - * Use this function to instruct where to find the dictionary. - * Setting a size of 0 is allowed (same effect as reset). - * Return : 1 if OK, 0 if error - */ -int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) - In the case of a ring buffers, decoding buffer must be either : - - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) - In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including small ones ( < 64 KB). - - _At least_ 64 KB + 8 bytes + maxBlockSize. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including larger than decoding buffer. - Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - and indicate where it is saved using LZ4_setStreamDecode() -*/ -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize); -int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize); - - -/* -Advanced decoding functions : -*_usingDict() : - These decoding functions work the same as - a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue() - They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure. -*/ -int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize); -int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); - - - -/************************************** -* Obsolete Functions -**************************************/ -/* Deprecate Warnings */ -/* Should these warnings messages be a problem, - it is generally possible to disable them, - with -Wno-deprecated-declarations for gcc - or _CRT_SECURE_NO_WARNINGS in Visual for example. - You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */ -#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK -# define LZ4_DEPRECATE_WARNING_DEFBLOCK -# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# if (LZ4_GCC_VERSION >= 405) || defined(__clang__) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif (LZ4_GCC_VERSION >= 301) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) -# elif defined(_MSC_VER) -# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) -# else -# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") -# define LZ4_DEPRECATED(message) -# endif -#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */ - -/* Obsolete compression functions */ -/* These functions are planned to start generate warnings by r131 approximately */ -int LZ4_compress (const char* source, char* dest, int sourceSize); -int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize); -int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); -int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); - -/* Obsolete decompression functions */ -/* These function names are completely deprecated and must no longer be used. - They are only provided here for compatibility with older programs. - - LZ4_uncompress is the same as LZ4_decompress_fast - - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe - These function prototypes are now disabled; uncomment them only if you really need them. - It is highly recommended to stop using these prototypes and migrate to maintained ones */ -/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ -/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ - -/* Obsolete streaming functions; use new streaming interface whenever possible */ -LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer); -LZ4_DEPRECATED("use LZ4_createStream() instead") int LZ4_sizeofStreamState(void); -LZ4_DEPRECATED("use LZ4_resetStream() instead") int LZ4_resetStreamState(void* state, char* inputBuffer); -LZ4_DEPRECATED("use LZ4_saveDict() instead") char* LZ4_slideInputBuffer (void* state); - -/* Obsolete streaming decoding functions */ -LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); -LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); - - -#if defined (__cplusplus) -} -#endif diff --git a/src/bitshuffle/lzf/LICENSE.txt b/src/bitshuffle/lzf/LICENSE.txt deleted file mode 100644 index 3787a007..00000000 --- a/src/bitshuffle/lzf/LICENSE.txt +++ /dev/null @@ -1,34 +0,0 @@ -Copyright Notice and Statement for LZF filter - -Copyright (c) 2008-2009 Andrew Collette -http://h5py.alfven.org -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -a. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -b. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the - distribution. - -c. Neither the name of the author nor the names of contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/src/bitshuffle/lzf/README.txt b/src/bitshuffle/lzf/README.txt deleted file mode 100644 index c6ad62c3..00000000 --- a/src/bitshuffle/lzf/README.txt +++ /dev/null @@ -1,84 +0,0 @@ -=============================== -LZF filter for HDF5, revision 3 -=============================== - -The LZF filter provides high-speed compression with acceptable compression -performance, resulting in much faster performance than DEFLATE, at the -cost of a slightly lower compression ratio. It's appropriate for large -datasets of low to moderate complexity, for which some compression is -much better than none, but for which the speed of DEFLATE is unacceptable. - -This filter has been tested against HDF5 versions 1.6.5 through 1.8.3. It -is released under the BSD license (see LICENSE.txt for details). - - -Using the filter from HDF5 --------------------------- - -There is exactly one new public function declared in lzf_filter.h, with -the following signature: - - int register_lzf(void) - -Calling this will register the filter with the HDF5 library. A non-negative -return value indicates success. If the registration fails, an error is pushed -onto the current error stack and a negative value is returned. - -It's strongly recommended to use the SHUFFLE filter with LZF, as it's -cheap, supported by all current versions of HDF5, and can significantly -improve the compression ratio. An example C program ("example.c") is included -which demonstrates the proper use of the filter. - - -Compiling ---------- - -The filter consists of a single .c file and header, along with an embedded -version of the LZF compression library. Since the filter is stateless, it's -recommended to statically link the entire thing into your program; for -example: - - $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog - -It can also be built as a shared library, although you will have to install -the resulting library somewhere the runtime linker can find it: - - $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so - -A similar procedure should be used for building C++ code. As in these -examples, using option -O1 or higher is strongly recommended for increased -performance. - - -Contact -------- - -This filter is maintained as part of the HDF5 for Python (h5py) project. The -goal of h5py is to provide access to the majority of the HDF5 C API and feature -set from Python. The most recent version of h5py (1.1) includes the LZF -filter by default. - -* Downloads and bug tracker: http://h5py.googlecode.com - -* Main web site and documentation: http://h5py.alfven.org - -* Contact email: h5py at alfven dot org - - -History of changes ------------------- - -Revision 3 (6/25/09) - Fix issue with changed filter struct definition under HDF5 1.8.3. - -Revision 2 - Minor speed enhancement. - -Revision 1 - Initial release. - - - - - - diff --git a/src/bitshuffle/lzf/README_bitshuffle.txt b/src/bitshuffle/lzf/README_bitshuffle.txt deleted file mode 100644 index d620a925..00000000 --- a/src/bitshuffle/lzf/README_bitshuffle.txt +++ /dev/null @@ -1,3 +0,0 @@ -The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org). -The version included with bitshuffle is from version 2.3 of h5py with no -modifications other than the addition of this README. diff --git a/src/bitshuffle/lzf/example.c b/src/bitshuffle/lzf/example.c deleted file mode 100644 index 23dd776c..00000000 --- a/src/bitshuffle/lzf/example.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright (C) 2009 Andrew Collette - http://h5py.alfven.org - License: BSD (see LICENSE.txt) - - Example program demonstrating use of the LZF filter from C code. - - To compile this program: - - h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example - - To run: - - $ ./example - Success! - $ h5ls -v test_lzf.hdf5 - Opened "test_lzf.hdf5" with sec2 driver. - dset Dataset {100/100, 100/100, 100/100} - Location: 0:1:0:976 - Links: 1 - Modified: 2009-02-15 16:35:11 PST - Chunks: {1, 100, 100} 40000 bytes - Storage: 4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization - Filter-0: shuffle-2 OPT {4} - Filter-1: lzf-32000 OPT {1, 261, 40000} - Type: native float -*/ - -#include -#include "hdf5.h" -#include "lzf_filter.h" - -#define SIZE 100*100*100 -#define SHAPE {100,100,100} -#define CHUNKSHAPE {1,100,100} - -int main(){ - - static float data[SIZE]; - static float data_out[SIZE]; - const hsize_t shape[] = SHAPE; - const hsize_t chunkshape[] = CHUNKSHAPE; - int r, i; - int return_code = 1; - - hid_t fid, sid, dset, plist = 0; - - for(i=0; i0) H5Dclose(dset); - if(sid>0) H5Sclose(sid); - if(plist>0) H5Pclose(plist); - if(fid>0) H5Fclose(fid); - - return return_code; -} - diff --git a/src/bitshuffle/lzf/lzf/lzf.h b/src/bitshuffle/lzf/lzf/lzf.h deleted file mode 100644 index 919b6e6b..00000000 --- a/src/bitshuffle/lzf/lzf/lzf.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2000-2008 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Alternatively, the contents of this file may be used under the terms of - * the GNU General Public License ("GPL") version 2 or any later version, - * in which case the provisions of the GPL are applicable instead of - * the above. If you wish to allow the use of your version of this file - * only under the terms of the GPL and not to allow others to use your - * version of this file under the BSD license, indicate your decision - * by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL. If you do not delete the - * provisions above, a recipient may use your version of this file under - * either the BSD or the GPL. - */ - -#ifndef LZF_H -#define LZF_H - -/*********************************************************************** -** -** lzf -- an extremely fast/free compression/decompression-method -** http://liblzf.plan9.de/ -** -** This algorithm is believed to be patent-free. -** -***********************************************************************/ - -#define LZF_VERSION 0x0105 /* 1.5, API version */ - -/* - * Compress in_len bytes stored at the memory block starting at - * in_data and write the result to out_data, up to a maximum length - * of out_len bytes. - * - * If the output buffer is not large enough or any error occurs return 0, - * otherwise return the number of bytes used, which might be considerably - * more than in_len (but less than 104% of the original size), so it - * makes sense to always use out_len == in_len - 1), to ensure _some_ - * compression, and store the data uncompressed otherwise (with a flag, of - * course. - * - * lzf_compress might use different algorithms on different systems and - * even different runs, thus might result in different compressed strings - * depending on the phase of the moon or similar factors. However, all - * these strings are architecture-independent and will result in the - * original data when decompressed using lzf_decompress. - * - * The buffers must not be overlapping. - * - * If the option LZF_STATE_ARG is enabled, an extra argument must be - * supplied which is not reflected in this header file. Refer to lzfP.h - * and lzf_c.c. - * - */ -unsigned int -lzf_compress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len); - -/* - * Decompress data compressed with some version of the lzf_compress - * function and stored at location in_data and length in_len. The result - * will be stored at out_data up to a maximum of out_len characters. - * - * If the output buffer is not large enough to hold the decompressed - * data, a 0 is returned and errno is set to E2BIG. Otherwise the number - * of decompressed bytes (i.e. the original length of the data) is - * returned. - * - * If an error in the compressed data is detected, a zero is returned and - * errno is set to EINVAL. - * - * This function is very fast, about as fast as a copying loop. - */ -unsigned int -lzf_decompress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len); - -#endif - diff --git a/src/bitshuffle/lzf/lzf/lzfP.h b/src/bitshuffle/lzf/lzf/lzfP.h deleted file mode 100644 index 8414da4d..00000000 --- a/src/bitshuffle/lzf/lzf/lzfP.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2000-2007 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Alternatively, the contents of this file may be used under the terms of - * the GNU General Public License ("GPL") version 2 or any later version, - * in which case the provisions of the GPL are applicable instead of - * the above. If you wish to allow the use of your version of this file - * only under the terms of the GPL and not to allow others to use your - * version of this file under the BSD license, indicate your decision - * by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL. If you do not delete the - * provisions above, a recipient may use your version of this file under - * either the BSD or the GPL. - */ - -#ifndef LZFP_h -#define LZFP_h - -#define STANDALONE 1 /* at the moment, this is ok. */ - -#ifndef STANDALONE -# include "lzf.h" -#endif - -/* - * Size of hashtable is (1 << HLOG) * sizeof (char *) - * decompression is independent of the hash table size - * the difference between 15 and 14 is very small - * for small blocks (and 14 is usually a bit faster). - * For a low-memory/faster configuration, use HLOG == 13; - * For best compression, use 15 or 16 (or more, up to 23). - */ -#ifndef HLOG -# define HLOG 17 /* Avoid pathological case at HLOG=16 A.C. 2/15/09 */ -#endif - -/* - * Sacrifice very little compression quality in favour of compression speed. - * This gives almost the same compression as the default code, and is - * (very roughly) 15% faster. This is the preferred mode of operation. - */ -#ifndef VERY_FAST -# define VERY_FAST 1 -#endif - -/* - * Sacrifice some more compression quality in favour of compression speed. - * (roughly 1-2% worse compression for large blocks and - * 9-10% for small, redundant, blocks and >>20% better speed in both cases) - * In short: when in need for speed, enable this for binary data, - * possibly disable this for text data. - */ -#ifndef ULTRA_FAST -# define ULTRA_FAST 1 -#endif - -/* - * Unconditionally aligning does not cost very much, so do it if unsure - */ -#ifndef STRICT_ALIGN -# define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) -#endif - -/* - * You may choose to pre-set the hash table (might be faster on some - * modern cpus and large (>>64k) blocks, and also makes compression - * deterministic/repeatable when the configuration otherwise is the same). - */ -#ifndef INIT_HTAB -# define INIT_HTAB 0 -#endif - -/* ======================================================================= - Changing things below this line may break the HDF5 LZF filter. - A.C. 2/15/09 - ======================================================================= -*/ - -/* - * Avoid assigning values to errno variable? for some embedding purposes - * (linux kernel for example), this is neccessary. NOTE: this breaks - * the documentation in lzf.h. - */ -#ifndef AVOID_ERRNO -# define AVOID_ERRNO 0 -#endif - -/* - * Wether to pass the LZF_STATE variable as argument, or allocate it - * on the stack. For small-stack environments, define this to 1. - * NOTE: this breaks the prototype in lzf.h. - */ -#ifndef LZF_STATE_ARG -# define LZF_STATE_ARG 0 -#endif - -/* - * Wether to add extra checks for input validity in lzf_decompress - * and return EINVAL if the input stream has been corrupted. This - * only shields against overflowing the input buffer and will not - * detect most corrupted streams. - * This check is not normally noticable on modern hardware - * (<1% slowdown), but might slow down older cpus considerably. - */ - -#ifndef CHECK_INPUT -# define CHECK_INPUT 1 -#endif - -/*****************************************************************************/ -/* nothing should be changed below */ - -typedef unsigned char u8; - -typedef const u8 *LZF_STATE[1 << (HLOG)]; - -#if !STRICT_ALIGN -/* for unaligned accesses we need a 16 bit datatype. */ -# include -# if USHRT_MAX == 65535 - typedef unsigned short u16; -# elif UINT_MAX == 65535 - typedef unsigned int u16; -# else -# undef STRICT_ALIGN -# define STRICT_ALIGN 1 -# endif -#endif - -#if ULTRA_FAST -# if defined(VERY_FAST) -# undef VERY_FAST -# endif -#endif - -#if INIT_HTAB -# ifdef __cplusplus -# include -# else -# include -# endif -#endif - -#endif - diff --git a/src/bitshuffle/lzf/lzf/lzf_c.c b/src/bitshuffle/lzf/lzf/lzf_c.c deleted file mode 100644 index fbfd4cce..00000000 --- a/src/bitshuffle/lzf/lzf/lzf_c.c +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (c) 2000-2008 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Alternatively, the contents of this file may be used under the terms of - * the GNU General Public License ("GPL") version 2 or any later version, - * in which case the provisions of the GPL are applicable instead of - * the above. If you wish to allow the use of your version of this file - * only under the terms of the GPL and not to allow others to use your - * version of this file under the BSD license, indicate your decision - * by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL. If you do not delete the - * provisions above, a recipient may use your version of this file under - * either the BSD or the GPL. - */ - -#include "lzfP.h" - -#define HSIZE (1 << (HLOG)) - -/* - * don't play with this unless you benchmark! - * decompression is not dependent on the hash function - * the hashing function might seem strange, just believe me - * it works ;) - */ -#ifndef FRST -# define FRST(p) (((p[0]) << 8) | p[1]) -# define NEXT(v,p) (((v) << 8) | p[2]) -# if ULTRA_FAST -# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1)) -# elif VERY_FAST -# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) -# else -# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) -# endif -#endif -/* - * IDX works because it is very similar to a multiplicative hash, e.g. - * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1)) - * the latter is also quite fast on newer CPUs, and compresses similarly. - * - * the next one is also quite good, albeit slow ;) - * (int)(cos(h & 0xffffff) * 1e6) - */ - -#if 0 -/* original lzv-like hash function, much worse and thus slower */ -# define FRST(p) (p[0] << 5) ^ p[1] -# define NEXT(v,p) ((v) << 5) ^ p[2] -# define IDX(h) ((h) & (HSIZE - 1)) -#endif - -#define MAX_LIT (1 << 5) -#define MAX_OFF (1 << 13) -#define MAX_REF ((1 << 8) + (1 << 3)) - -#if __GNUC__ >= 3 -# define expect(expr,value) __builtin_expect ((expr),(value)) -# define inline inline -#else -# define expect(expr,value) (expr) -# define inline static -#endif - -#define expect_false(expr) expect ((expr) != 0, 0) -#define expect_true(expr) expect ((expr) != 0, 1) - -/* - * compressed format - * - * 000LLLLL ; literal - * LLLooooo oooooooo ; backref L - * 111ooooo LLLLLLLL oooooooo ; backref L+7 - * - */ - -unsigned int -lzf_compress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len -#if LZF_STATE_ARG - , LZF_STATE htab -#endif - ) -{ -#if !LZF_STATE_ARG - LZF_STATE htab; -#endif - const u8 **hslot; - const u8 *ip = (const u8 *)in_data; - u8 *op = (u8 *)out_data; - const u8 *in_end = ip + in_len; - u8 *out_end = op + out_len; - const u8 *ref; - - /* off requires a type wide enough to hold a general pointer difference. - * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only - * works for differences within a single object). We also assume that no - * no bit pattern traps. Since the only platform that is both non-POSIX - * and fails to support both assumptions is windows 64 bit, we make a - * special workaround for it. - */ -#if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64) - unsigned _int64 off; /* workaround for missing POSIX compliance */ -#else - unsigned long off; -#endif - unsigned int hval; - int lit; - - if (!in_len || !out_len) - return 0; - -#if INIT_HTAB - memset (htab, 0, sizeof (htab)); -# if 0 - for (hslot = htab; hslot < htab + HSIZE; hslot++) - *hslot++ = ip; -# endif -#endif - - lit = 0; op++; /* start run */ - - hval = FRST (ip); - while (ip < in_end - 2) - { - hval = NEXT (hval, ip); - hslot = htab + IDX (hval); - ref = *hslot; *hslot = ip; - - if (1 -#if INIT_HTAB - && ref < ip /* the next test will actually take care of this, but this is faster */ -#endif - && (off = ip - ref - 1) < MAX_OFF - && ip + 4 < in_end - && ref > (u8 *)in_data -#if STRICT_ALIGN - && ref[0] == ip[0] - && ref[1] == ip[1] - && ref[2] == ip[2] -#else - && *(u16 *)ref == *(u16 *)ip - && ref[2] == ip[2] -#endif - ) - { - /* match found at *ref++ */ - unsigned int len = 2; - unsigned int maxlen = in_end - ip - len; - maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; - - if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ - if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */ - return 0; - - op [- lit - 1] = lit - 1; /* stop run */ - op -= !lit; /* undo run if length is zero */ - - for (;;) - { - if (expect_true (maxlen > 16)) - { - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - len++; if (ref [len] != ip [len]) break; - } - - do - len++; - while (len < maxlen && ref[len] == ip[len]); - - break; - } - - len -= 2; /* len is now #octets - 1 */ - ip++; - - if (len < 7) - { - *op++ = (off >> 8) + (len << 5); - } - else - { - *op++ = (off >> 8) + ( 7 << 5); - *op++ = len - 7; - } - - *op++ = off; - lit = 0; op++; /* start run */ - - ip += len + 1; - - if (expect_false (ip >= in_end - 2)) - break; - -#if ULTRA_FAST || VERY_FAST - --ip; -# if VERY_FAST && !ULTRA_FAST - --ip; -# endif - hval = FRST (ip); - - hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; - ip++; - -# if VERY_FAST && !ULTRA_FAST - hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; - ip++; -# endif -#else - ip -= len + 1; - - do - { - hval = NEXT (hval, ip); - htab[IDX (hval)] = ip; - ip++; - } - while (len--); -#endif - } - else - { - /* one more literal byte we must copy */ - if (expect_false (op >= out_end)) - return 0; - - lit++; *op++ = *ip++; - - if (expect_false (lit == MAX_LIT)) - { - op [- lit - 1] = lit - 1; /* stop run */ - lit = 0; op++; /* start run */ - } - } - } - - if (op + 3 > out_end) /* at most 3 bytes can be missing here */ - return 0; - - while (ip < in_end) - { - lit++; *op++ = *ip++; - - if (expect_false (lit == MAX_LIT)) - { - op [- lit - 1] = lit - 1; /* stop run */ - lit = 0; op++; /* start run */ - } - } - - op [- lit - 1] = lit - 1; /* end run */ - op -= !lit; /* undo run if length is zero */ - - return op - (u8 *)out_data; -} - diff --git a/src/bitshuffle/lzf/lzf/lzf_d.c b/src/bitshuffle/lzf/lzf/lzf_d.c deleted file mode 100644 index 2e2eedaa..00000000 --- a/src/bitshuffle/lzf/lzf/lzf_d.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2000-2007 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Alternatively, the contents of this file may be used under the terms of - * the GNU General Public License ("GPL") version 2 or any later version, - * in which case the provisions of the GPL are applicable instead of - * the above. If you wish to allow the use of your version of this file - * only under the terms of the GPL and not to allow others to use your - * version of this file under the BSD license, indicate your decision - * by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL. If you do not delete the - * provisions above, a recipient may use your version of this file under - * either the BSD or the GPL. - */ - -#include "lzfP.h" - -#if AVOID_ERRNO -# define SET_ERRNO(n) -#else -# include -# define SET_ERRNO(n) errno = (n) -#endif - -/* ASM is slower than C in HDF5 tests -- A.C. 2/5/09 -#ifndef __STRICT_ANSI__ -#ifndef H5PY_DISABLE_LZF_ASM -#if (__i386 || __amd64) && __GNUC__ >= 3 -# define lzf_movsb(dst, src, len) \ - asm ("rep movsb" \ - : "=D" (dst), "=S" (src), "=c" (len) \ - : "0" (dst), "1" (src), "2" (len)); -#endif -#endif -#endif -*/ - -unsigned int -lzf_decompress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len) -{ - u8 const *ip = (const u8 *)in_data; - u8 *op = (u8 *)out_data; - u8 const *const in_end = ip + in_len; - u8 *const out_end = op + out_len; - - do - { - unsigned int ctrl = *ip++; - - if (ctrl < (1 << 5)) /* literal run */ - { - ctrl++; - - if (op + ctrl > out_end) - { - SET_ERRNO (E2BIG); - return 0; - } - -#if CHECK_INPUT - if (ip + ctrl > in_end) - { - SET_ERRNO (EINVAL); - return 0; - } -#endif - -#ifdef lzf_movsb - lzf_movsb (op, ip, ctrl); -#else - do - *op++ = *ip++; - while (--ctrl); -#endif - } - else /* back reference */ - { - unsigned int len = ctrl >> 5; - - u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; - -#if CHECK_INPUT - if (ip >= in_end) - { - SET_ERRNO (EINVAL); - return 0; - } -#endif - if (len == 7) - { - len += *ip++; -#if CHECK_INPUT - if (ip >= in_end) - { - SET_ERRNO (EINVAL); - return 0; - } -#endif - } - - ref -= *ip++; - - if (op + len + 2 > out_end) - { - SET_ERRNO (E2BIG); - return 0; - } - - if (ref < (u8 *)out_data) - { - SET_ERRNO (EINVAL); - return 0; - } - -#ifdef lzf_movsb - len += 2; - lzf_movsb (op, ref, len); -#else - *op++ = *ref++; - *op++ = *ref++; - - do - *op++ = *ref++; - while (--len); -#endif - } - } - while (ip < in_end); - - return op - (u8 *)out_data; -} - diff --git a/src/bitshuffle/lzf/lzf_filter.c b/src/bitshuffle/lzf/lzf_filter.c deleted file mode 100644 index c6dd4b0e..00000000 --- a/src/bitshuffle/lzf/lzf_filter.c +++ /dev/null @@ -1,261 +0,0 @@ -/***** Preamble block ********************************************************* -* -* This file is part of h5py, a low-level Python interface to the HDF5 library. -* -* Copyright (C) 2008 Andrew Collette -* http://h5py.alfven.org -* License: BSD (See LICENSE.txt for full license) -* -* $Date$ -* -****** End preamble block ****************************************************/ - -/* - Implements an LZF filter module for HDF5, using the BSD-licensed library - by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html). - - No Python-specific code is used. The filter behaves like the DEFLATE - filter, in that it is called for every type and space, and returns 0 - if the data cannot be compressed. - - The only public function is (int) register_lzf(void), which passes on - the result from H5Zregister. -*/ - -#include -#include -#include -#include "hdf5.h" -#include "lzf/lzf.h" -#include "lzf_filter.h" - -/* Our own versions of H5Epush_sim, as it changed in 1.8 */ -#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7 - -#define PUSH_ERR(func, minor, str) H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str) -#define H5PY_GET_FILTER H5Pget_filter_by_id - -#else - -#define PUSH_ERR(func, minor, str) H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) -#define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL) - -#endif - -/* Deal with the mutiple definitions for H5Z_class_t. - Note: Only HDF5 1.6 and 1.8 are supported. - - (1) The old class should always be used for HDF5 1.6 - (2) The new class should always be used for HDF5 1.8 < 1.8.3 - (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the - macro H5_USE_16_API is set -*/ - -#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API) -#define H5PY_H5Z_NEWCLS 1 -#else -#define H5PY_H5Z_NEWCLS 0 -#endif - -size_t lzf_filter(unsigned flags, size_t cd_nelmts, - const unsigned cd_values[], size_t nbytes, - size_t *buf_size, void **buf); - -herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); - - -/* Try to register the filter, passing on the HDF5 return value */ -int register_lzf(void){ - - int retval; - -#if H5PY_H5Z_NEWCLS - H5Z_class_t filter_class = { - H5Z_CLASS_T_VERS, - (H5Z_filter_t)(H5PY_FILTER_LZF), - 1, 1, - "lzf", - NULL, - (H5Z_set_local_func_t)(lzf_set_local), - (H5Z_func_t)(lzf_filter) - }; -#else - H5Z_class_t filter_class = { - (H5Z_filter_t)(H5PY_FILTER_LZF), - "lzf", - NULL, - (H5Z_set_local_func_t)(lzf_set_local), - (H5Z_func_t)(lzf_filter) - }; -#endif - - retval = H5Zregister(&filter_class); - if(retval<0){ - PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter"); - } - return retval; -} - -/* Filter setup. Records the following inside the DCPL: - - 1. If version information is not present, set slots 0 and 1 to the filter - revision and LZF API version, respectively. - - 2. Compute the chunk size in bytes and store it in slot 2. -*/ -herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){ - - int ndims; - int i; - herr_t r; - - unsigned int bufsize; - hsize_t chunkdims[32]; - - unsigned int flags; - size_t nelements = 8; - unsigned values[] = {0,0,0,0,0,0,0,0}; - - r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL); - if(r<0) return -1; - - if(nelements < 3) nelements = 3; /* First 3 slots reserved. If any higher - slots are used, preserve the contents. */ - - /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be - careful not to clobber any existing version info */ - if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION; - if(values[1]==0) values[1] = LZF_VERSION; - - ndims = H5Pget_chunk(dcpl, 32, chunkdims); - if(ndims<0) return -1; - if(ndims>32){ - PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit"); - return -1; - } - - bufsize = H5Tget_size(type); - if(bufsize==0) return -1; - - for(i=0;i=3)&&(cd_values[2]!=0)){ - outbuf_size = cd_values[2]; /* Precomputed buffer guess */ - }else{ - outbuf_size = (*buf_size); - } - -#ifdef H5PY_LZF_DEBUG - fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size); -#endif - - while(!status){ - - free(outbuf); - outbuf = malloc(outbuf_size); - - if(outbuf == NULL){ - PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer"); - goto failed; - } - - status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size); - - if(!status){ /* compression failed */ - - if(errno == E2BIG){ - outbuf_size += (*buf_size); -#ifdef H5PY_LZF_DEBUG - fprintf(stderr, " Too small: %d\n", outbuf_size); -#endif - } else if(errno == EINVAL) { - - PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression"); - goto failed; - - } else { - PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error"); - goto failed; - } - - } /* if !status */ - - } /* while !status */ - - } /* compressing vs decompressing */ - - if(status != 0){ - - free(*buf); - *buf = outbuf; - *buf_size = outbuf_size; - - return status; /* Size of compressed/decompressed data */ - } - - failed: - - free(outbuf); - return 0; - -} /* End filter function */ - - - - - - - - - - - - - diff --git a/src/bitshuffle/lzf/lzf_filter.h b/src/bitshuffle/lzf/lzf_filter.h deleted file mode 100644 index 27dff83a..00000000 --- a/src/bitshuffle/lzf/lzf_filter.h +++ /dev/null @@ -1,38 +0,0 @@ -/***** Preamble block ********************************************************* -* -* This file is part of h5py, a low-level Python interface to the HDF5 library. -* -* Copyright (C) 2008 Andrew Collette -* http://h5py.alfven.org -* License: BSD (See LICENSE.txt for full license) -* -* $Date$ -* -****** End preamble block ****************************************************/ - - -#ifndef H5PY_LZF_H -#define H5PY_LZF_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* Filter revision number, starting at 1 */ -#define H5PY_FILTER_LZF_VERSION 4 - -/* Filter ID registered with the HDF Group as of 2/6/09. For maintenance - requests, contact the filter author directly. */ -#define H5PY_FILTER_LZF 32000 - -/* Register the filter with the library. Returns a negative value on failure, - and a non-negative value on success. -*/ -int register_lzf(void); - -#ifdef __cplusplus -} -#endif - -#endif - diff --git a/src/bitshuffle/requirements.txt b/src/bitshuffle/requirements.txt deleted file mode 100644 index 2f0d0fbb..00000000 --- a/src/bitshuffle/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -# Order matters -setuptools>=0.7 -Cython>=0.19 -numpy>=1.6.1 -h5py>=2.4.0 --no-binary=h5py diff --git a/src/bitshuffle/setup.cfg.example b/src/bitshuffle/setup.cfg.example deleted file mode 100644 index 6bd2ccfb..00000000 --- a/src/bitshuffle/setup.cfg.example +++ /dev/null @@ -1,10 +0,0 @@ -[install] -# These control the installation of the hdf5 dynamically loaded filter plugin. -h5plugin = 0 -h5plugin-dir = /usr/local/hdf5/lib/plugin - -[build_ext] -# Whether to compile with OpenMP multi-threading. Default is system dependant: -# False on OSX (since the clang compiler does not yet support OpenMP) and True -# otherwise. -omp = 1 diff --git a/src/bitshuffle/setup.py b/src/bitshuffle/setup.py deleted file mode 100644 index 830991cd..00000000 --- a/src/bitshuffle/setup.py +++ /dev/null @@ -1,323 +0,0 @@ -from __future__ import absolute_import, division, print_function -# I didn't import unicode_literals. They break setuptools or Cython in python -# 2.7, but python 3 seems to be happy with them. - -import glob -import os -from os import path -from setuptools import setup, Extension -from setuptools.command.build_ext import build_ext as build_ext_ -from setuptools.command.develop import develop as develop_ -from setuptools.command.install import install as install_ -import shutil -import subprocess -import sys - - -VERSION_MAJOR = 0 -VERSION_MINOR = 3 -VERSION_POINT = 5 - -# Only unset in the 'release' branch and in tags. -VERSION_DEV = 0 - -VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT) -if VERSION_DEV: - VERSION = VERSION + ".dev%d" % VERSION_DEV - - -COMPILE_FLAGS = ['-O3', '-ffast-math', '-march=native', '-std=c99'] -# Cython breaks strict aliasing rules. -COMPILE_FLAGS += ['-fno-strict-aliasing'] -COMPILE_FLAGS += ['-fPIC'] -COMPILE_FLAGS_MSVC = ['/Ox', '/fp:fast'] - -MACROS = [ - ('BSHUF_VERSION_MAJOR', VERSION_MAJOR), - ('BSHUF_VERSION_MINOR', VERSION_MINOR), - ('BSHUF_VERSION_POINT', VERSION_POINT), -] - - -H5PLUGINS_DEFAULT = '/usr/local/hdf5/lib/plugin' - -# OSX's clang compliler does not support OpenMP. -if sys.platform == 'darwin': - OMP_DEFAULT = False -else: - OMP_DEFAULT = True - -FALLBACK_CONFIG = { - 'include_dirs': [], - 'library_dirs': [], - 'libraries': [], - 'extra_compile_args': [], - 'extra_link_args': [], -} - -if 'HDF5_DIR' in os.environ: - FALLBACK_CONFIG['include_dirs'] += [os.environ['HDF5_DIR'] + '/include'] # macports - FALLBACK_CONFIG['library_dirs'] += [os.environ['HDF5_DIR'] + '/lib'] # macports -elif sys.platform == 'darwin': - # putting here both macports and homebrew paths will generate - # "ld: warning: dir not found" at the linking phase - FALLBACK_CONFIG['include_dirs'] += ['/opt/local/include'] # macports - FALLBACK_CONFIG['library_dirs'] += ['/opt/local/lib'] # macports - FALLBACK_CONFIG['include_dirs'] += ['/usr/local/include'] # homebrew - FALLBACK_CONFIG['library_dirs'] += ['/usr/local/lib'] # homebrew -elif sys.platform.startswith('freebsd'): - FALLBACK_CONFIG['include_dirs'] += ['/usr/local/include'] # homebrew - FALLBACK_CONFIG['library_dirs'] += ['/usr/local/lib'] # homebrew - -FALLBACK_CONFIG['include_dirs'] = [d for d in FALLBACK_CONFIG['include_dirs'] - if path.isdir(d)] -FALLBACK_CONFIG['library_dirs'] = [d for d in FALLBACK_CONFIG['library_dirs'] - if path.isdir(d)] - -FALLBACK_CONFIG['extra_compile_args'] = ['-DH5_BUILT_AS_DYNAMIC_LIB'] - - -def pkgconfig(*packages, **kw): - config = kw.setdefault('config', {}) - optional_args = kw.setdefault('optional', '') - flag_map = {'include_dirs': ['--cflags-only-I', 2], - 'library_dirs': ['--libs-only-L', 2], - 'libraries': ['--libs-only-l', 2], - 'extra_compile_args': ['--cflags-only-other', 0], - 'extra_link_args': ['--libs-only-other', 0], - } - for package in packages: - try: - subprocess.check_output(["pkg-config", package]) - except (subprocess.CalledProcessError, OSError): - print("Can't find %s with pkg-config fallback to " - "static config" % package) - for distutils_key in flag_map: - config.setdefault(distutils_key, []).extend( - FALLBACK_CONFIG[distutils_key]) - config['libraries'].append(package) - else: - for distutils_key, (pkg_option, n) in flag_map.items(): - items = subprocess.check_output( - ['pkg-config', optional_args, pkg_option, package] - ).decode('utf8').split() - opt = config.setdefault(distutils_key, []) - opt.extend([i[n:] for i in items]) - return config - - -ext_bshuf = Extension( - "bitshuffle.ext", - sources=["bitshuffle/ext.pyx", "src/bitshuffle.c", - "src/bitshuffle_core.c", "src/iochain.c", - "lz4/lz4.c"], - include_dirs=["src/", "lz4/"], - depends=["src/bitshuffle.h", "src/bitshuffle_core.h", - "src/iochain.h", "lz4/lz4.h"], - libraries=[], - define_macros=MACROS, -) - -h5filter = Extension( - "bitshuffle.h5", - sources=["bitshuffle/h5.pyx", "src/bshuf_h5filter.c", - "src/bitshuffle.c", "src/bitshuffle_core.c", - "src/iochain.c", "lz4/lz4.c"], - depends=["src/bitshuffle.h", "src/bitshuffle_core.h", - "src/iochain.h", "src/bshuf_h5filter.h", - "lz4/lz4.h"], - define_macros=MACROS, - **pkgconfig("hdf5", config=dict( - include_dirs=["src/", "lz4/"])) -) - -filter_plugin = Extension( - "bitshuffle.plugin.libh5bshuf", - sources=["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c", - "src/bitshuffle.c", "src/bitshuffle_core.c", - "src/iochain.c", "lz4/lz4.c"], - depends=["src/bitshuffle.h", "src/bitshuffle_core.h", - "src/iochain.h", 'src/bshuf_h5filter.h', - "lz4/lz4.h"], - define_macros=MACROS, - **pkgconfig("hdf5", config=dict( - include_dirs=["src/", "lz4/"])) -) - -lzf_plugin = Extension( - "bitshuffle.plugin.libh5LZF", - sources=["src/lzf_h5plugin.c", "lzf/lzf_filter.c", - "lzf/lzf/lzf_c.c", "lzf/lzf/lzf_d.c"], - depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h", - "lzf/lzf/lzfP.h"], - **pkgconfig("hdf5", config=dict( - include_dirs=["lzf/", "lzf/lzf/"])) -) - - -EXTENSIONS = [ext_bshuf, h5filter] -# Check for plugin hdf5 plugin support (hdf5 >= 1.8.11) -HDF5_PLUGIN_SUPPORT = False -CPATHS = os.environ['CPATH'].split(':') if 'CPATH' in os.environ else [] -for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS: - if os.path.exists(os.path.join(p, "H5PLextern.h")): - HDF5_PLUGIN_SUPPORT = True - -if HDF5_PLUGIN_SUPPORT: - EXTENSIONS.extend([filter_plugin, lzf_plugin]) - - -class develop(develop_): - def run(self): - # Dummy directory for copying build plugins. - if not path.isdir('bitshuffle/plugin'): - os.mkdir('bitshuffle/plugin') - develop_.run(self) - - -# Custom installation to include installing dynamic filters. -class install(install_): - user_options = install_.user_options + [ - ('h5plugin', None, - 'Install HDF5 filter plugins for use outside of python.'), - ('h5plugin-dir=', None, - 'Where to install filter plugins. Default %s.' % H5PLUGINS_DEFAULT), - ] - - def initialize_options(self): - install_.initialize_options(self) - self.h5plugin = False - self.h5plugin_dir = H5PLUGINS_DEFAULT - - def finalize_options(self): - install_.finalize_options(self) - if self.h5plugin not in ('0', '1', True, False): - raise ValueError("Invalid h5plugin argument. Mut be '0' or '1'.") - self.h5plugin = int(self.h5plugin) - self.h5plugin_dir = path.abspath(self.h5plugin_dir) - - def run(self): - install_.run(self) - if self.h5plugin: - if not HDF5_PLUGIN_SUPPORT: - print("HDF5 < 1.8.11, not installing filter plugins.") - return - plugin_build = path.join(self.build_lib, "bitshuffle", "plugin") - try: - os.makedirs(self.h5plugin_dir) - except OSError as e: - if e.args[0] == 17: - # Directory already exists, this is fine. - pass - else: - raise - plugin_libs = glob.glob(path.join(plugin_build, "*")) - for plugin_lib in plugin_libs: - plugin_name = path.split(plugin_lib)[1] - shutil.copy2(plugin_lib, - path.join(self.h5plugin_dir, plugin_name)) - print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir) - - -# Command line or site.cfg specification of OpenMP. -class build_ext(build_ext_): - user_options = build_ext_.user_options + [ - ('omp=', None, "Whether to compile with OpenMP threading. Default" - " on current system is %s." % str(OMP_DEFAULT)) - ] - boolean_options = build_ext_.boolean_options + ['omp'] - - def initialize_options(self): - build_ext_.initialize_options(self) - self.omp = OMP_DEFAULT - - def finalize_options(self): - # For some reason this gets run twice. Careful to print messages and - # add arguments only one time. - build_ext_.finalize_options(self) - - if self.omp not in ('0', '1', True, False): - raise ValueError("Invalid omp argument. Mut be '0' or '1'.") - self.omp = int(self.omp) - - import numpy as np - ext_bshuf.include_dirs.append(np.get_include()) - - # Required only by old version of setuptools < 18.0 - from Cython.Build import cythonize - self.extensions = cythonize(self.extensions) - for ext in self.extensions: - ext._needs_stub = False - - def build_extensions(self): - c = self.compiler.compiler_type - - if self.omp not in ('0', '1', True, False): - raise ValueError("Invalid omp argument. Mut be '0' or '1'.") - self.omp = int(self.omp) - - if self.omp: - if not hasattr(self, "_printed_omp_message"): - self._printed_omp_message = True - print("\n#################################") - print("# Compiling with OpenMP support #") - print("#################################\n") - # More portable to pass -fopenmp to linker. - # self.libraries += ['gomp'] - if self.compiler.compiler_type == 'msvc': - openmpflag = '/openmp' - compileflags = COMPILE_FLAGS_MSVC - else: - openmpflag = '-fopenmp' - compileflags = COMPILE_FLAGS - for e in self.extensions: - e.extra_compile_args = list(set(e.extra_compile_args).union(compileflags)) - if openmpflag not in e.extra_compile_args: - e.extra_compile_args += [openmpflag] - if openmpflag not in e.extra_link_args: - e.extra_link_args += [openmpflag] - - build_ext_.build_extensions(self) - - -# Don't install numpy/cython/hdf5 if not needed -for cmd in ["sdist", "clean", - "--help", "--help-commands", "--version"]: - if cmd in sys.argv: - setup_requires = [] - break -else: - setup_requires = ["Cython>=0.19", "numpy>=1.6.1"] - -with open('requirements.txt') as f: - requires = f.read().splitlines() - requires = [r.split()[0] for r in requires] - -with open('README.rst') as r: - long_description = r.read() - -# TODO hdf5 support should be an "extra". Figure out how to set this up. -setup( - name='bitshuffle', - version=VERSION, - - packages=['bitshuffle', 'bitshuffle.tests'], - scripts=[], - ext_modules=EXTENSIONS, - cmdclass={'build_ext': build_ext, 'install': install, 'develop': develop}, - setup_requires=setup_requires, - install_requires=requires, - # extras_require={'H5': ["h5py"]}, - package_data={'': ['data/*']}, - - # metadata for upload to PyPI - author="Kiyoshi Wesley Masui", - author_email="kiyo@physics.ubc.ca", - description="Bitshuffle filter for improving typed data compression.", - long_description=long_description, - license="MIT", - url="https://github.com/kiyo-masui/bitshuffle", - download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s" - % VERSION), - keywords=['compression', 'hdf5', 'numpy'], -) diff --git a/src/bitshuffle/src/bitshuffle.c b/src/bitshuffle/src/bitshuffle.c deleted file mode 100644 index 54ff045f..00000000 --- a/src/bitshuffle/src/bitshuffle.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Bitshuffle - Filter for improving compression of typed binary data. - * - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - -#include "bitshuffle.h" -#include "bitshuffle_core.h" -#include "bitshuffle_internals.h" -#include "lz4.h" - -#include -#include - - -// Constants. -// Use fast decompression instead of safe decompression for LZ4. -#define BSHUF_LZ4_DECOMPRESS_FAST - - -// Macros. -#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \ - free(buf); return count - 1000; } - - -/* Bitshuffle and compress a single block. */ -int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \ - const size_t size, const size_t elem_size) { - - int64_t nbytes, count; - void *tmp_buf_bshuf; - void *tmp_buf_lz4; - size_t this_iter; - const void *in; - void *out; - - tmp_buf_bshuf = malloc(size * elem_size); - if (tmp_buf_bshuf == NULL) return -1; - - tmp_buf_lz4 = malloc(LZ4_compressBound(size * elem_size)); - if (tmp_buf_lz4 == NULL){ - free(tmp_buf_bshuf); - return -1; - } - - - in = ioc_get_in(C_ptr, &this_iter); - ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); - - count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size); - if (count < 0) { - free(tmp_buf_lz4); - free(tmp_buf_bshuf); - return count; - } - nbytes = LZ4_compress((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size); - free(tmp_buf_bshuf); - CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4); - - out = ioc_get_out(C_ptr, &this_iter); - ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4)); - - bshuf_write_uint32_BE(out, nbytes); - memcpy((char *) out + 4, tmp_buf_lz4, nbytes); - - free(tmp_buf_lz4); - - return nbytes + 4; -} - - -/* Decompress and bitunshuffle a single block. */ -int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr, - const size_t size, const size_t elem_size) { - - int64_t nbytes, count; - void *out, *tmp_buf; - const void *in; - size_t this_iter; - int32_t nbytes_from_header; - - in = ioc_get_in(C_ptr, &this_iter); - nbytes_from_header = bshuf_read_uint32_BE(in); - ioc_set_next_in(C_ptr, &this_iter, - (void*) ((char*) in + nbytes_from_header + 4)); - - out = ioc_get_out(C_ptr, &this_iter); - ioc_set_next_out(C_ptr, &this_iter, - (void *) ((char *) out + size * elem_size)); - - tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - -#ifdef BSHUF_LZ4_DECOMPRESS_FAST - nbytes = LZ4_decompress_fast((const char*) in + 4, (char*) tmp_buf, size * elem_size); - CHECK_ERR_FREE_LZ(nbytes, tmp_buf); - if (nbytes != nbytes_from_header) { - free(tmp_buf); - return -91; - } -#else - nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header, - size * elem_size); - CHECK_ERR_FREE_LZ(nbytes, tmp_buf); - if (nbytes != size * elem_size) { - free(tmp_buf); - return -91; - } - nbytes = nbytes_from_header; -#endif - count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - nbytes += 4; - - free(tmp_buf); - return nbytes; -} - - -/* ---- Public functions ---- - * - * See header file for description and usage. - * - */ - -size_t bshuf_compress_lz4_bound(const size_t size, - const size_t elem_size, size_t block_size) { - - size_t bound, leftover; - - if (block_size == 0) { - block_size = bshuf_default_block_size(elem_size); - } - if (block_size % BSHUF_BLOCKED_MULT) return -81; - - // Note that each block gets a 4 byte header. - // Size of full blocks. - bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size); - // Size of partial blocks, if any. - leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; - if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4; - // Size of uncompressed data not fitting into any blocks. - bound += (size % BSHUF_BLOCKED_MULT) * elem_size; - return bound; -} - - -int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size) { - return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size, - elem_size, block_size); -} - - -int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size) { - return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size, - elem_size, block_size); -} - diff --git a/src/bitshuffle/src/bitshuffle.h b/src/bitshuffle/src/bitshuffle.h deleted file mode 100644 index 3df95f47..00000000 --- a/src/bitshuffle/src/bitshuffle.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Bitshuffle - Filter for improving compression of typed binary data. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - * - * Header File - * - * Worker routines return an int64_t which is the number of bytes processed - * if positive or an error code if negative. - * - * Error codes: - * -1 : Failed to allocate memory. - * -11 : Missing SSE. - * -12 : Missing AVX. - * -80 : Input size not a multiple of 8. - * -81 : block_size not multiple of 8. - * -91 : Decompression error, wrong number of bytes processed. - * -1YYY : Error internal to compression routine with error code -YYY. - */ - - -#ifndef BITSHUFFLE_H -#define BITSHUFFLE_H - -#include -#include "bitshuffle_core.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---- bshuf_compress_lz4_bound ---- - * - * Bound on size of data compressed with *bshuf_compress_lz4*. - * - * Parameters - * ---------- - * size : number of elements in input - * elem_size : element size of typed data - * block_size : Process in blocks of this many elements. Pass 0 to - * select automatically (recommended). - * - * Returns - * ------- - * Bound on compressed data size. - * - */ -size_t bshuf_compress_lz4_bound(const size_t size, - const size_t elem_size, size_t block_size); - - -/* ---- bshuf_compress_lz4 ---- - * - * Bitshuffled and compress the data using LZ4. - * - * Transpose within elements, in blocks of data of *block_size* elements then - * compress the blocks using LZ4. In the output buffer, each block is prefixed - * by a 4 byte integer giving the compressed size of that block. - * - * Output buffer must be large enough to hold the compressed data. This could - * be in principle substantially larger than the input buffer. Use the routine - * *bshuf_compress_lz4_bound* to get an upper limit. - * - * Parameters - * ---------- - * in : input buffer, must be of size * elem_size bytes - * out : output buffer, must be large enough to hold data. - * size : number of elements in input - * elem_size : element size of typed data - * block_size : Process in blocks of this many elements. Pass 0 to - * select automatically (recommended). - * - * Returns - * ------- - * number of bytes used in output buffer, negative error-code if failed. - * - */ -int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t - elem_size, size_t block_size); - - -/* ---- bshuf_decompress_lz4 ---- - * - * Undo compression and bitshuffling. - * - * Decompress data then un-bitshuffle it in blocks of *block_size* elements. - * - * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* - * must patch the parameters used to compress the data. - * - * NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function - * LZ4_decompress_fast from LZ4, which does not protect against maliciously - * formed datasets. By modifying the compressed data, this function could be - * coerced into leaving the boundaries of the input buffer. - * - * Parameters - * ---------- - * in : input buffer - * out : output buffer, must be of size * elem_size bytes - * size : number of elements in input - * elem_size : element size of typed data - * block_size : Process in blocks of this many elements. Pass 0 to - * select automatically (recommended). - * - * Returns - * ------- - * number of bytes consumed in *input* buffer, negative error-code if failed. - * - */ -int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // BITSHUFFLE_H diff --git a/src/bitshuffle/src/bitshuffle_core.c b/src/bitshuffle/src/bitshuffle_core.c deleted file mode 100644 index 8028e3a6..00000000 --- a/src/bitshuffle/src/bitshuffle_core.c +++ /dev/null @@ -1,1862 +0,0 @@ -/* - * Bitshuffle - Filter for improving compression of typed binary data. - * - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - -#include "bitshuffle_core.h" -#include "bitshuffle_internals.h" - -#include -#include - - -#if defined(__AVX2__) && defined (__SSE2__) -#define USEAVX2 -#endif - -#if defined(__SSE2__) -#define USESSE2 -#endif - -#if defined(__ARM_NEON__) || (__ARM_NEON) -#define USEARMNEON -#endif - -// Conditional includes for SSE2 and AVX2. -#ifdef USEAVX2 -#include -#elif defined USESSE2 -#include -#elif defined USEARMNEON -#include -#endif - -#if defined(_OPENMP) && defined(_MSC_VER) -typedef int64_t omp_size_t; -#else -typedef size_t omp_size_t; -#endif - -// Macros. -#define CHECK_MULT_EIGHT(n) if (n % 8) return -80; -#define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) - - -/* ---- Functions indicating compile time instruction set. ---- */ - -int bshuf_using_NEON(void) { -#ifdef USEARMNEON - return 1; -#else - return 0; -#endif -} - - -int bshuf_using_SSE2(void) { -#ifdef USESSE2 - return 1; -#else - return 0; -#endif -} - - -int bshuf_using_AVX2(void) { -#ifdef USEAVX2 - return 1; -#else - return 0; -#endif -} - - -/* ---- Worker code not requiring special instruction sets. ---- - * - * The following code does not use any x86 specific vectorized instructions - * and should compile on any machine - * - */ - -/* Transpose 8x8 bit array packed into a single quadword *x*. - * *t* is workspace. */ -#define TRANS_BIT_8X8(x, t) { \ - t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL; \ - x = x ^ t ^ (t << 7); \ - t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL; \ - x = x ^ t ^ (t << 14); \ - t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL; \ - x = x ^ t ^ (t << 28); \ - } - -/* Transpose 8x8 bit array along the diagonal from upper right - to lower left */ -#define TRANS_BIT_8X8_BE(x, t) { \ - t = (x ^ (x >> 9)) & 0x0055005500550055LL; \ - x = x ^ t ^ (t << 9); \ - t = (x ^ (x >> 18)) & 0x0000333300003333LL; \ - x = x ^ t ^ (t << 18); \ - t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL; \ - x = x ^ t ^ (t << 36); \ - } - -/* Transpose of an array of arbitrarily typed elements. */ -#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) { \ - size_t ii, jj, kk; \ - const type_t* in_type = (const type_t*) in; \ - type_t* out_type = (type_t*) out; \ - for(ii = 0; ii + 7 < lda; ii += 8) { \ - for(jj = 0; jj < ldb; jj++) { \ - for(kk = 0; kk < 8; kk++) { \ - out_type[jj*lda + ii + kk] = \ - in_type[ii*ldb + kk * ldb + jj]; \ - } \ - } \ - } \ - for(ii = lda - lda % 8; ii < lda; ii ++) { \ - for(jj = 0; jj < ldb; jj++) { \ - out_type[jj*lda + ii] = in_type[ii*ldb + jj]; \ - } \ - } \ - } - - -/* Memory copy with bshuf call signature. For testing and profiling. */ -int64_t bshuf_copy(const void* in, void* out, const size_t size, - const size_t elem_size) { - - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - memcpy(out_b, in_b, size * elem_size); - return size * elem_size; -} - - -/* Transpose bytes within elements, starting partway through input. */ -int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, - const size_t elem_size, const size_t start) { - - size_t ii, jj, kk; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - CHECK_MULT_EIGHT(start); - - if (size > start) { - // ii loop separated into 2 loops so the compiler can unroll - // the inner one. - for (ii = start; ii + 7 < size; ii += 8) { - for (jj = 0; jj < elem_size; jj++) { - for (kk = 0; kk < 8; kk++) { - out_b[jj * size + ii + kk] - = in_b[ii * elem_size + kk * elem_size + jj]; - } - } - } - for (ii = size - size % 8; ii < size; ii ++) { - for (jj = 0; jj < elem_size; jj++) { - out_b[jj * size + ii] = in_b[ii * elem_size + jj]; - } - } - } - return size * elem_size; -} - - -/* Transpose bytes within elements. */ -int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, - const size_t elem_size) { - - return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); -} - - -/* Transpose bits within bytes. */ -int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, - const size_t elem_size, const size_t start_byte) { - - const uint64_t* in_b = (const uint64_t*) in; - uint8_t* out_b = (uint8_t*) out; - - uint64_t x, t; - - size_t ii, kk; - size_t nbyte = elem_size * size; - size_t nbyte_bitrow = nbyte / 8; - - uint64_t e=1; - const int little_endian = *(uint8_t *) &e == 1; - const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; - const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; - - CHECK_MULT_EIGHT(nbyte); - CHECK_MULT_EIGHT(start_byte); - - for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { - x = in_b[ii]; - if (little_endian) { - TRANS_BIT_8X8(x, t); - } else { - TRANS_BIT_8X8_BE(x, t); - } - for (kk = 0; kk < 8; kk ++) { - out_b[bit_row_offset + kk * bit_row_skip + ii] = x; - x = x >> 8; - } - } - return size * elem_size; -} - - -/* Transpose bits within bytes. */ -int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, - const size_t elem_size) { - - return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); -} - - -/* General transpose of an array, optimized for large element sizes. */ -int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda, - const size_t ldb, const size_t elem_size) { - - size_t ii, jj; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - for(ii = 0; ii < lda; ii++) { - for(jj = 0; jj < ldb; jj++) { - memcpy(&out_b[(jj*lda + ii) * elem_size], - &in_b[(ii*ldb + jj) * elem_size], elem_size); - } - } - return lda * ldb * elem_size; -} - - -/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ -int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t nbyte_bitrow = size / 8; - - CHECK_MULT_EIGHT(size); - - return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); -} - - -/* Transpose bits within elements. */ -int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - void *tmp_buf; - - CHECK_MULT_EIGHT(size); - - tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - - -/* For data organized into a row for each bit (8 * elem_size rows), transpose - * the bytes. */ -int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, - const size_t elem_size) { - size_t ii, jj, kk, nbyte_row; - const char *in_b; - char *out_b; - - - in_b = (const char*) in; - out_b = (char*) out; - - nbyte_row = size / 8; - - CHECK_MULT_EIGHT(size); - - for (jj = 0; jj < elem_size; jj++) { - for (ii = 0; ii < nbyte_row; ii++) { - for (kk = 0; kk < 8; kk++) { - out_b[ii * 8 * elem_size + jj * 8 + kk] = \ - in_b[(jj * 8 + kk) * nbyte_row + ii]; - } - } - } - return size * elem_size; -} - - -/* Shuffle bits within the bytes of eight element blocks. */ -int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \ - const size_t size, const size_t elem_size) { - - const char *in_b; - char *out_b; - uint64_t x, t; - size_t ii, jj, kk; - size_t nbyte, out_index; - - uint64_t e=1; - const int little_endian = *(uint8_t *) &e == 1; - const size_t elem_skip = little_endian ? elem_size : -elem_size; - const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; - - CHECK_MULT_EIGHT(size); - - in_b = (const char*) in; - out_b = (char*) out; - - nbyte = elem_size * size; - - for (jj = 0; jj < 8 * elem_size; jj += 8) { - for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { - x = *((uint64_t*) &in_b[ii + jj]); - if (little_endian) { - TRANS_BIT_8X8(x, t); - } else { - TRANS_BIT_8X8_BE(x, t); - } - for (kk = 0; kk < 8; kk++) { - out_index = ii + jj / 8 + elem_offset + kk * elem_skip; - *((uint8_t*) &out_b[out_index]) = x; - x = x >> 8; - } - } - } - return size * elem_size; -} - - -/* Untranspose bits within elements. */ -int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - void *tmp_buf; - - CHECK_MULT_EIGHT(size); - - tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - - -/* ---- Worker code that uses Arm NEON ---- - * - * The following code makes use of the Arm NEON instruction set. - * NEON technology is the implementation of the ARM Advanced Single - * Instruction Multiple Data (SIMD) extension. - * The NEON unit is the component of the processor that executes SIMD instructions. - * It is also called the NEON Media Processing Engine (MPE). - * - */ - -#ifdef USEARMNEON - -/* Transpose bytes within elements for 16 bit elements. */ -int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { - - size_t ii; - const char *in_b = (const char*) in; - char *out_b = (char*) out; - int8x16_t a0, b0, a1, b1; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = vld1q_s8(in_b + 2*ii + 0*16); - b0 = vld1q_s8(in_b + 2*ii + 1*16); - - a1 = vzip1q_s8(a0, b0); - b1 = vzip2q_s8(a0, b0); - - a0 = vzip1q_s8(a1, b1); - b0 = vzip2q_s8(a1, b1); - - a1 = vzip1q_s8(a0, b0); - b1 = vzip2q_s8(a0, b0); - - a0 = vzip1q_s8(a1, b1); - b0 = vzip2q_s8(a1, b1); - - vst1q_s8(out_b + 0*size + ii, a0); - vst1q_s8(out_b + 1*size + ii, b0); - } - - return bshuf_trans_byte_elem_remainder(in, out, size, 2, - size - size % 16); -} - - -/* Transpose bytes within elements for 32 bit elements. */ -int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { - - size_t ii; - const char *in_b; - char *out_b; - in_b = (const char*) in; - out_b = (char*) out; - int8x16_t a0, b0, c0, d0, a1, b1, c1, d1; - int64x2_t a2, b2, c2, d2; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = vld1q_s8(in_b + 4*ii + 0*16); - b0 = vld1q_s8(in_b + 4*ii + 1*16); - c0 = vld1q_s8(in_b + 4*ii + 2*16); - d0 = vld1q_s8(in_b + 4*ii + 3*16); - - a1 = vzip1q_s8(a0, b0); - b1 = vzip2q_s8(a0, b0); - c1 = vzip1q_s8(c0, d0); - d1 = vzip2q_s8(c0, d0); - - a0 = vzip1q_s8(a1, b1); - b0 = vzip2q_s8(a1, b1); - c0 = vzip1q_s8(c1, d1); - d0 = vzip2q_s8(c1, d1); - - a1 = vzip1q_s8(a0, b0); - b1 = vzip2q_s8(a0, b0); - c1 = vzip1q_s8(c0, d0); - d1 = vzip2q_s8(c0, d0); - - a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); - b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); - c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); - d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); - - vst1q_s64((int64_t *) (out_b + 0*size + ii), a2); - vst1q_s64((int64_t *) (out_b + 1*size + ii), b2); - vst1q_s64((int64_t *) (out_b + 2*size + ii), c2); - vst1q_s64((int64_t *) (out_b + 3*size + ii), d2); - } - - return bshuf_trans_byte_elem_remainder(in, out, size, 4, - size - size % 16); -} - - -/* Transpose bytes within elements for 64 bit elements. */ -int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { - - size_t ii; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; - int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = vld1q_s8(in_b + 8*ii + 0*16); - b0 = vld1q_s8(in_b + 8*ii + 1*16); - c0 = vld1q_s8(in_b + 8*ii + 2*16); - d0 = vld1q_s8(in_b + 8*ii + 3*16); - e0 = vld1q_s8(in_b + 8*ii + 4*16); - f0 = vld1q_s8(in_b + 8*ii + 5*16); - g0 = vld1q_s8(in_b + 8*ii + 6*16); - h0 = vld1q_s8(in_b + 8*ii + 7*16); - - a1 = vzip1q_s8 (a0, b0); - b1 = vzip2q_s8 (a0, b0); - c1 = vzip1q_s8 (c0, d0); - d1 = vzip2q_s8 (c0, d0); - e1 = vzip1q_s8 (e0, f0); - f1 = vzip2q_s8 (e0, f0); - g1 = vzip1q_s8 (g0, h0); - h1 = vzip2q_s8 (g0, h0); - - a0 = vzip1q_s8 (a1, b1); - b0 = vzip2q_s8 (a1, b1); - c0 = vzip1q_s8 (c1, d1); - d0 = vzip2q_s8 (c1, d1); - e0 = vzip1q_s8 (e1, f1); - f0 = vzip2q_s8 (e1, f1); - g0 = vzip1q_s8 (g1, h1); - h0 = vzip2q_s8 (g1, h1); - - a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); - b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); - c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); - d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); - e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); - f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); - g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); - h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); - - a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); - b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); - c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); - d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); - e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); - f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); - g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); - h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); - - vst1q_s8(out_b + 0*size + ii, a0); - vst1q_s8(out_b + 1*size + ii, b0); - vst1q_s8(out_b + 2*size + ii, c0); - vst1q_s8(out_b + 3*size + ii, d0); - vst1q_s8(out_b + 4*size + ii, e0); - vst1q_s8(out_b + 5*size + ii, f0); - vst1q_s8(out_b + 6*size + ii, g0); - vst1q_s8(out_b + 7*size + ii, h0); - } - - return bshuf_trans_byte_elem_remainder(in, out, size, 8, - size - size % 16); -} - - -/* Transpose bytes within elements using best NEON algorithm available. */ -int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - // Trivial cases: power of 2 bytes. - switch (elem_size) { - case 1: - count = bshuf_copy(in, out, size, elem_size); - return count; - case 2: - count = bshuf_trans_byte_elem_NEON_16(in, out, size); - return count; - case 4: - count = bshuf_trans_byte_elem_NEON_32(in, out, size); - return count; - case 8: - count = bshuf_trans_byte_elem_NEON_64(in, out, size); - return count; - } - - // Worst case: odd number of bytes. Turns out that this is faster for - // (odd * 2) byte elements as well (hence % 4). - if (elem_size % 4) { - count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); - return count; - } - - // Multiple of power of 2: transpose hierarchically. - { - size_t nchunk_elem; - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - if ((elem_size % 8) == 0) { - nchunk_elem = elem_size / 8; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); - count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); - } else if ((elem_size % 4) == 0) { - nchunk_elem = elem_size / 4; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); - count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); - } else { - // Not used since scalar algorithm is faster. - nchunk_elem = elem_size / 2; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); - count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); - } - - free(tmp_buf); - return count; - } -} - - -/* Creates a mask made up of the most significant - * bit of each byte of 'input' - */ -int32_t move_byte_mask_neon(uint8x16_t input) { - - return ( ((input[0] & 0x80) >> 7) | (((input[1] & 0x80) >> 7) << 1) | (((input[2] & 0x80) >> 7) << 2) | (((input[3] & 0x80) >> 7) << 3) - | (((input[4] & 0x80) >> 7) << 4) | (((input[5] & 0x80) >> 7) << 5) | (((input[6] & 0x80) >> 7) << 6) | (((input[7] & 0x80) >> 7) << 7) - | (((input[8] & 0x80) >> 7) << 8) | (((input[9] & 0x80) >> 7) << 9) | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11) - | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15) - ); -} - -/* Transpose bits within bytes. */ -int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t ii, kk; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - uint16_t* out_ui16; - - int64_t count; - - size_t nbyte = elem_size * size; - - CHECK_MULT_EIGHT(nbyte); - - int16x8_t xmm; - int32_t bt; - - for (ii = 0; ii + 15 < nbyte; ii += 16) { - xmm = vld1q_s16((int16_t *) (in_b + ii)); - for (kk = 0; kk < 8; kk++) { - bt = move_byte_mask_neon((uint8x16_t) xmm); - xmm = vshlq_n_s16(xmm, 1); - out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; - *out_ui16 = bt; - } - } - count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, - nbyte - nbyte % 16); - return count; -} - - -/* Transpose bits within elements. */ -int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - - -/* For data organized into a row for each bit (8 * elem_size rows), transpose - * the bytes. */ -int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t ii, jj; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - CHECK_MULT_EIGHT(size); - - size_t nrows = 8 * elem_size; - size_t nbyte_row = size / 8; - - int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; - int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; - int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; - - for (ii = 0; ii + 7 < nrows; ii += 8) { - for (jj = 0; jj + 15 < nbyte_row; jj += 16) { - a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj); - b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj); - c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj); - d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj); - e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj); - f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj); - g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj); - h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj); - - a1 = vzip1q_s8(a0, b0); - b1 = vzip1q_s8(c0, d0); - c1 = vzip1q_s8(e0, f0); - d1 = vzip1q_s8(g0, h0); - e1 = vzip2q_s8(a0, b0); - f1 = vzip2q_s8(c0, d0); - g1 = vzip2q_s8(e0, f0); - h1 = vzip2q_s8(g0, h0); - - a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); - b0= (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); - c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); - d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); - e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); - f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); - g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); - h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); - - a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); - b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); - c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); - d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); - e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); - f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); - g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); - h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); - - as = (int64x1_t *) &a1; - bs = (int64x1_t *) &b1; - cs = (int64x1_t *) &c1; - ds = (int64x1_t *) &d1; - es = (int64x1_t *) &e1; - fs = (int64x1_t *) &f1; - gs = (int64x1_t *) &g1; - hs = (int64x1_t *) &h1; - - vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as); - vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1)); - vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs); - vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1)); - vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs); - vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1)); - vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds); - vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1)); - vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es); - vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1)); - vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs); - vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1)); - vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs); - vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1)); - vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs); - vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1)); - } - for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { - out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; - out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; - out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; - out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; - out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; - out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; - out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; - out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; - } - } - return size * elem_size; -} - - -/* Shuffle bits within the bytes of eight element blocks. */ -int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - CHECK_MULT_EIGHT(size); - - // With a bit of care, this could be written such that such that it is - // in_buf = out_buf safe. - const char* in_b = (const char*) in; - uint16_t* out_ui16 = (uint16_t*) out; - - size_t ii, jj, kk; - size_t nbyte = elem_size * size; - - int16x8_t xmm; - int32_t bt; - - if (elem_size % 2) { - bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); - } else { - for (ii = 0; ii + 8 * elem_size - 1 < nbyte; - ii += 8 * elem_size) { - for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { - xmm = vld1q_s16((int16_t *) &in_b[ii + jj]); - for (kk = 0; kk < 8; kk++) { - bt = move_byte_mask_neon((uint8x16_t) xmm); - xmm = vshlq_n_s16(xmm, 1); - size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); - out_ui16[ind / 2] = bt; - } - } - } - } - return size * elem_size; -} - - -/* Untranspose bits within elements. */ -int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - -#else // #ifdef USEARMNEON - -int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { - return -13; -} - - -int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { - return -13; -} - - -int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { - return -13; -} - - -int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -13; -} - - -#endif - - - - - -/* ---- Worker code that uses SSE2 ---- - * - * The following code makes use of the SSE2 instruction set and specialized - * 16 byte registers. The SSE2 instructions are present on modern x86 - * processors. The first Intel processor microarchitecture supporting SSE2 was - * Pentium 4 (2000). - * - */ - -#ifdef USESSE2 - -/* Transpose bytes within elements for 16 bit elements. */ -int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { - - size_t ii; - const char *in_b = (const char*) in; - char *out_b = (char*) out; - __m128i a0, b0, a1, b1; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]); - b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]); - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpackhi_epi8(a0, b0); - - a0 = _mm_unpacklo_epi8(a1, b1); - b0 = _mm_unpackhi_epi8(a1, b1); - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpackhi_epi8(a0, b0); - - a0 = _mm_unpacklo_epi8(a1, b1); - b0 = _mm_unpackhi_epi8(a1, b1); - - _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); - _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); - } - return bshuf_trans_byte_elem_remainder(in, out, size, 2, - size - size % 16); -} - - -/* Transpose bytes within elements for 32 bit elements. */ -int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { - - size_t ii; - const char *in_b; - char *out_b; - in_b = (const char*) in; - out_b = (char*) out; - __m128i a0, b0, c0, d0, a1, b1, c1, d1; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]); - b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]); - c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]); - d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]); - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpackhi_epi8(a0, b0); - c1 = _mm_unpacklo_epi8(c0, d0); - d1 = _mm_unpackhi_epi8(c0, d0); - - a0 = _mm_unpacklo_epi8(a1, b1); - b0 = _mm_unpackhi_epi8(a1, b1); - c0 = _mm_unpacklo_epi8(c1, d1); - d0 = _mm_unpackhi_epi8(c1, d1); - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpackhi_epi8(a0, b0); - c1 = _mm_unpacklo_epi8(c0, d0); - d1 = _mm_unpackhi_epi8(c0, d0); - - a0 = _mm_unpacklo_epi64(a1, c1); - b0 = _mm_unpackhi_epi64(a1, c1); - c0 = _mm_unpacklo_epi64(b1, d1); - d0 = _mm_unpackhi_epi64(b1, d1); - - _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); - _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); - _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); - _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); - } - return bshuf_trans_byte_elem_remainder(in, out, size, 4, - size - size % 16); -} - - -/* Transpose bytes within elements for 64 bit elements. */ -int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { - - size_t ii; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - __m128i a0, b0, c0, d0, e0, f0, g0, h0; - __m128i a1, b1, c1, d1, e1, f1, g1, h1; - - for (ii=0; ii + 15 < size; ii += 16) { - a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]); - b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]); - c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]); - d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]); - e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]); - f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]); - g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]); - h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]); - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpackhi_epi8(a0, b0); - c1 = _mm_unpacklo_epi8(c0, d0); - d1 = _mm_unpackhi_epi8(c0, d0); - e1 = _mm_unpacklo_epi8(e0, f0); - f1 = _mm_unpackhi_epi8(e0, f0); - g1 = _mm_unpacklo_epi8(g0, h0); - h1 = _mm_unpackhi_epi8(g0, h0); - - a0 = _mm_unpacklo_epi8(a1, b1); - b0 = _mm_unpackhi_epi8(a1, b1); - c0 = _mm_unpacklo_epi8(c1, d1); - d0 = _mm_unpackhi_epi8(c1, d1); - e0 = _mm_unpacklo_epi8(e1, f1); - f0 = _mm_unpackhi_epi8(e1, f1); - g0 = _mm_unpacklo_epi8(g1, h1); - h0 = _mm_unpackhi_epi8(g1, h1); - - a1 = _mm_unpacklo_epi32(a0, c0); - b1 = _mm_unpackhi_epi32(a0, c0); - c1 = _mm_unpacklo_epi32(b0, d0); - d1 = _mm_unpackhi_epi32(b0, d0); - e1 = _mm_unpacklo_epi32(e0, g0); - f1 = _mm_unpackhi_epi32(e0, g0); - g1 = _mm_unpacklo_epi32(f0, h0); - h1 = _mm_unpackhi_epi32(f0, h0); - - a0 = _mm_unpacklo_epi64(a1, e1); - b0 = _mm_unpackhi_epi64(a1, e1); - c0 = _mm_unpacklo_epi64(b1, f1); - d0 = _mm_unpackhi_epi64(b1, f1); - e0 = _mm_unpacklo_epi64(c1, g1); - f0 = _mm_unpackhi_epi64(c1, g1); - g0 = _mm_unpacklo_epi64(d1, h1); - h0 = _mm_unpackhi_epi64(d1, h1); - - _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); - _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); - _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); - _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); - _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0); - _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0); - _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0); - _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0); - } - return bshuf_trans_byte_elem_remainder(in, out, size, 8, - size - size % 16); -} - - -/* Transpose bytes within elements using best SSE algorithm available. */ -int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - // Trivial cases: power of 2 bytes. - switch (elem_size) { - case 1: - count = bshuf_copy(in, out, size, elem_size); - return count; - case 2: - count = bshuf_trans_byte_elem_SSE_16(in, out, size); - return count; - case 4: - count = bshuf_trans_byte_elem_SSE_32(in, out, size); - return count; - case 8: - count = bshuf_trans_byte_elem_SSE_64(in, out, size); - return count; - } - - // Worst case: odd number of bytes. Turns out that this is faster for - // (odd * 2) byte elements as well (hence % 4). - if (elem_size % 4) { - count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); - return count; - } - - // Multiple of power of 2: transpose hierarchically. - { - size_t nchunk_elem; - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - if ((elem_size % 8) == 0) { - nchunk_elem = elem_size / 8; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); - count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); - } else if ((elem_size % 4) == 0) { - nchunk_elem = elem_size / 4; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); - count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); - } else { - // Not used since scalar algorithm is faster. - nchunk_elem = elem_size / 2; - TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); - count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, - size * nchunk_elem); - bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); - } - - free(tmp_buf); - return count; - } -} - - -/* Transpose bits within bytes. */ -int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t ii, kk; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - uint16_t* out_ui16; - - int64_t count; - - size_t nbyte = elem_size * size; - - CHECK_MULT_EIGHT(nbyte); - - __m128i xmm; - int32_t bt; - - for (ii = 0; ii + 15 < nbyte; ii += 16) { - xmm = _mm_loadu_si128((__m128i *) &in_b[ii]); - for (kk = 0; kk < 8; kk++) { - bt = _mm_movemask_epi8(xmm); - xmm = _mm_slli_epi16(xmm, 1); - out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; - *out_ui16 = bt; - } - } - count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, - nbyte - nbyte % 16); - return count; -} - - -/* Transpose bits within elements. */ -int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bit_byte_SSE(out, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - - -/* For data organized into a row for each bit (8 * elem_size rows), transpose - * the bytes. */ -int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t ii, jj; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - CHECK_MULT_EIGHT(size); - - size_t nrows = 8 * elem_size; - size_t nbyte_row = size / 8; - - __m128i a0, b0, c0, d0, e0, f0, g0, h0; - __m128i a1, b1, c1, d1, e1, f1, g1, h1; - __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; - - for (ii = 0; ii + 7 < nrows; ii += 8) { - for (jj = 0; jj + 15 < nbyte_row; jj += 16) { - a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]); - b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]); - c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]); - d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]); - e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]); - f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]); - g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]); - h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]); - - - a1 = _mm_unpacklo_epi8(a0, b0); - b1 = _mm_unpacklo_epi8(c0, d0); - c1 = _mm_unpacklo_epi8(e0, f0); - d1 = _mm_unpacklo_epi8(g0, h0); - e1 = _mm_unpackhi_epi8(a0, b0); - f1 = _mm_unpackhi_epi8(c0, d0); - g1 = _mm_unpackhi_epi8(e0, f0); - h1 = _mm_unpackhi_epi8(g0, h0); - - - a0 = _mm_unpacklo_epi16(a1, b1); - b0 = _mm_unpacklo_epi16(c1, d1); - c0 = _mm_unpackhi_epi16(a1, b1); - d0 = _mm_unpackhi_epi16(c1, d1); - - e0 = _mm_unpacklo_epi16(e1, f1); - f0 = _mm_unpacklo_epi16(g1, h1); - g0 = _mm_unpackhi_epi16(e1, f1); - h0 = _mm_unpackhi_epi16(g1, h1); - - - a1 = _mm_unpacklo_epi32(a0, b0); - b1 = _mm_unpackhi_epi32(a0, b0); - - c1 = _mm_unpacklo_epi32(c0, d0); - d1 = _mm_unpackhi_epi32(c0, d0); - - e1 = _mm_unpacklo_epi32(e0, f0); - f1 = _mm_unpackhi_epi32(e0, f0); - - g1 = _mm_unpacklo_epi32(g0, h0); - h1 = _mm_unpackhi_epi32(g0, h0); - - // We don't have a storeh instruction for integers, so interpret - // as a float. Have a storel (_mm_storel_epi64). - as = (__m128 *) &a1; - bs = (__m128 *) &b1; - cs = (__m128 *) &c1; - ds = (__m128 *) &d1; - es = (__m128 *) &e1; - fs = (__m128 *) &f1; - gs = (__m128 *) &g1; - hs = (__m128 *) &h1; - - _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as); - _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs); - _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs); - _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds); - _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es); - _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs); - _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs); - _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs); - - _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as); - _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs); - _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs); - _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds); - _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es); - _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs); - _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs); - _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs); - } - for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { - out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; - out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; - out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; - out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; - out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; - out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; - out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; - out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; - } - } - return size * elem_size; -} - - -/* Shuffle bits within the bytes of eight element blocks. */ -int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - CHECK_MULT_EIGHT(size); - - // With a bit of care, this could be written such that such that it is - // in_buf = out_buf safe. - const char* in_b = (const char*) in; - uint16_t* out_ui16 = (uint16_t*) out; - - size_t ii, jj, kk; - size_t nbyte = elem_size * size; - - __m128i xmm; - int32_t bt; - - if (elem_size % 2) { - bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); - } else { - for (ii = 0; ii + 8 * elem_size - 1 < nbyte; - ii += 8 * elem_size) { - for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { - xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]); - for (kk = 0; kk < 8; kk++) { - bt = _mm_movemask_epi8(xmm); - xmm = _mm_slli_epi16(xmm, 1); - size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); - out_ui16[ind / 2] = bt; - } - } - } - } - return size * elem_size; -} - - -/* Untranspose bits within elements. */ -int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_bitrow_SSE(in, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_shuffle_bit_eightelem_SSE(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - -#else // #ifdef USESSE2 - - -int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { - return -11; -} - - -int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { - return -11; -} - - -int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { - return -11; -} - - -int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -11; -} - - -#endif // #ifdef USESSE2 - - -/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ - -/* ---- Worker code that uses AVX2 ---- - * - * The following code makes use of the AVX2 instruction set and specialized - * 32 byte registers. The AVX2 instructions are present on newer x86 - * processors. The first Intel processor microarchitecture supporting AVX2 was - * Haswell (2013). - * - */ - -#ifdef USEAVX2 - -/* Transpose bits within bytes. */ -int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t ii, kk; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - int32_t* out_i32; - - size_t nbyte = elem_size * size; - - int64_t count; - - __m256i ymm; - int32_t bt; - - for (ii = 0; ii + 31 < nbyte; ii += 32) { - ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); - for (kk = 0; kk < 8; kk++) { - bt = _mm256_movemask_epi8(ymm); - ymm = _mm256_slli_epi16(ymm, 1); - out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; - *out_i32 = bt; - } - } - count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, - nbyte - nbyte % 32); - return count; -} - - -/* Transpose bits within elements. */ -int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bit_byte_AVX(out, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); - - free(tmp_buf); - - return count; -} - - -/* For data organized into a row for each bit (8 * elem_size rows), transpose - * the bytes. */ -int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - - size_t hh, ii, jj, kk, mm; - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - CHECK_MULT_EIGHT(size); - - size_t nrows = 8 * elem_size; - size_t nbyte_row = size / 8; - - if (elem_size % 4) return bshuf_trans_byte_bitrow_SSE(in, out, size, - elem_size); - - __m256i ymm_0[8]; - __m256i ymm_1[8]; - __m256i ymm_storeage[8][4]; - - for (jj = 0; jj + 31 < nbyte_row; jj += 32) { - for (ii = 0; ii + 3 < elem_size; ii += 4) { - for (hh = 0; hh < 4; hh ++) { - - for (kk = 0; kk < 8; kk ++){ - ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[ - (ii * 8 + hh * 8 + kk) * nbyte_row + jj]); - } - - for (kk = 0; kk < 4; kk ++){ - ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2], - ymm_0[kk * 2 + 1]); - ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2], - ymm_0[kk * 2 + 1]); - } - - for (kk = 0; kk < 2; kk ++){ - for (mm = 0; mm < 2; mm ++){ - ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16( - ymm_1[kk * 4 + mm * 2], - ymm_1[kk * 4 + mm * 2 + 1]); - ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16( - ymm_1[kk * 4 + mm * 2], - ymm_1[kk * 4 + mm * 2 + 1]); - } - } - - for (kk = 0; kk < 4; kk ++){ - ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2], - ymm_0[kk * 2 + 1]); - ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2], - ymm_0[kk * 2 + 1]); - } - - for (kk = 0; kk < 8; kk ++){ - ymm_storeage[kk][hh] = ymm_1[kk]; - } - } - - for (mm = 0; mm < 8; mm ++) { - - for (kk = 0; kk < 4; kk ++){ - ymm_0[kk] = ymm_storeage[mm][kk]; - } - - ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]); - ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]); - ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]); - ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]); - - ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32); - ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32); - ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49); - ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49); - - _mm256_storeu_si256((__m256i *) &out_b[ - (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]); - _mm256_storeu_si256((__m256i *) &out_b[ - (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]); - _mm256_storeu_si256((__m256i *) &out_b[ - (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]); - _mm256_storeu_si256((__m256i *) &out_b[ - (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]); - } - } - } - for (ii = 0; ii < nrows; ii ++ ) { - for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) { - out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj]; - } - } - return size * elem_size; -} - - -/* Shuffle bits within the bytes of eight element blocks. */ -int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - - CHECK_MULT_EIGHT(size); - - // With a bit of care, this could be written such that such that it is - // in_buf = out_buf safe. - const char* in_b = (const char*) in; - char* out_b = (char*) out; - - size_t ii, jj, kk; - size_t nbyte = elem_size * size; - - __m256i ymm; - int32_t bt; - - if (elem_size % 4) { - return bshuf_shuffle_bit_eightelem_SSE(in, out, size, elem_size); - } else { - for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) { - for (ii = 0; ii + 8 * elem_size - 1 < nbyte; - ii += 8 * elem_size) { - ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]); - for (kk = 0; kk < 8; kk++) { - bt = _mm256_movemask_epi8(ymm); - ymm = _mm256_slli_epi16(ymm, 1); - size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); - * (int32_t *) &out_b[ind] = bt; - } - } - } - } - return size * elem_size; -} - - -/* Untranspose bits within elements. */ -int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; - - CHECK_MULT_EIGHT(size); - - void* tmp_buf = malloc(size * elem_size); - if (tmp_buf == NULL) return -1; - - count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size); - CHECK_ERR_FREE(count, tmp_buf); - count = bshuf_shuffle_bit_eightelem_AVX(tmp_buf, out, size, elem_size); - - free(tmp_buf); - return count; -} - - -#else // #ifdef USEAVX2 - -int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -12; -} - - -int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -12; -} - - -int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -12; -} - - -int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -12; -} - - -int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, - const size_t elem_size) { - return -12; -} - -#endif // #ifdef USEAVX2 - - -/* ---- Drivers selecting best instruction set at compile time. ---- */ - -int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; -#ifdef USEAVX2 - count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size); -#elif defined(USESSE2) - count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size); -#elif defined(USEARMNEON) - count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size); -#else - count = bshuf_trans_bit_elem_scal(in, out, size, elem_size); -#endif - return count; -} - - -int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, - const size_t elem_size) { - - int64_t count; -#ifdef USEAVX2 - count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size); -#elif defined(USESSE2) - count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size); -#elif defined(USEARMNEON) - count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size); -#else - count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size); -#endif - return count; -} - - -/* ---- Wrappers for implementing blocking ---- */ - -/* Wrap a function for processing a single block to process an entire buffer in - * parallel. */ -int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \ - const size_t size, const size_t elem_size, size_t block_size) { - - omp_size_t ii = 0; - int64_t err = 0; - int64_t count, cum_count=0; - size_t last_block_size; - size_t leftover_bytes; - size_t this_iter; - char *last_in; - char *last_out; - - - ioc_chain C; - ioc_init(&C, in, out); - - - if (block_size == 0) { - block_size = bshuf_default_block_size(elem_size); - } - if (block_size % BSHUF_BLOCKED_MULT) return -81; - -#if defined(_OPENMP) - #pragma omp parallel for schedule(dynamic, 1) \ - private(count) reduction(+ : cum_count) -#endif - for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) { - count = fun(&C, block_size, elem_size); - if (count < 0) err = count; - cum_count += count; - } - - last_block_size = size % block_size; - last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT; - if (last_block_size) { - count = fun(&C, last_block_size, elem_size); - if (count < 0) err = count; - cum_count += count; - } - - if (err < 0) return err; - - leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size; - //this_iter; - last_in = (char *) ioc_get_in(&C, &this_iter); - ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes)); - last_out = (char *) ioc_get_out(&C, &this_iter); - ioc_set_next_out(&C, &this_iter, (void *) (last_out + leftover_bytes)); - - memcpy(last_out, last_in, leftover_bytes); - - ioc_destroy(&C); - - return cum_count + leftover_bytes; -} - - -/* Bitshuffle a single block. */ -int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \ - const size_t size, const size_t elem_size) { - - size_t this_iter; - const void *in; - void *out; - int64_t count; - - - - in = ioc_get_in(C_ptr, &this_iter); - ioc_set_next_in(C_ptr, &this_iter, - (void*) ((char*) in + size * elem_size)); - out = ioc_get_out(C_ptr, &this_iter); - ioc_set_next_out(C_ptr, &this_iter, - (void *) ((char *) out + size * elem_size)); - - count = bshuf_trans_bit_elem(in, out, size, elem_size); - return count; -} - - -/* Bitunshuffle a single block. */ -int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \ - const size_t size, const size_t elem_size) { - - - size_t this_iter; - const void *in; - void *out; - int64_t count; - - - - - in = ioc_get_in(C_ptr, &this_iter); - ioc_set_next_in(C_ptr, &this_iter, - (void*) ((char*) in + size * elem_size)); - out = ioc_get_out(C_ptr, &this_iter); - ioc_set_next_out(C_ptr, &this_iter, - (void *) ((char *) out + size * elem_size)); - - count = bshuf_untrans_bit_elem(in, out, size, elem_size); - return count; -} - - -/* Write a 64 bit unsigned integer to a buffer in big endian order. */ -void bshuf_write_uint64_BE(void* buf, uint64_t num) { - int ii; - uint8_t* b = (uint8_t*) buf; - uint64_t pow28 = 1 << 8; - for (ii = 7; ii >= 0; ii--) { - b[ii] = num % pow28; - num = num / pow28; - } -} - - -/* Read a 64 bit unsigned integer from a buffer big endian order. */ -uint64_t bshuf_read_uint64_BE(void* buf) { - int ii; - uint8_t* b = (uint8_t*) buf; - uint64_t num = 0, pow28 = 1 << 8, cp = 1; - for (ii = 7; ii >= 0; ii--) { - num += b[ii] * cp; - cp *= pow28; - } - return num; -} - - -/* Write a 32 bit unsigned integer to a buffer in big endian order. */ -void bshuf_write_uint32_BE(void* buf, uint32_t num) { - int ii; - uint8_t* b = (uint8_t*) buf; - uint32_t pow28 = 1 << 8; - for (ii = 3; ii >= 0; ii--) { - b[ii] = num % pow28; - num = num / pow28; - } -} - - -/* Read a 32 bit unsigned integer from a buffer big endian order. */ -uint32_t bshuf_read_uint32_BE(const void* buf) { - int ii; - uint8_t* b = (uint8_t*) buf; - uint32_t num = 0, pow28 = 1 << 8, cp = 1; - for (ii = 3; ii >= 0; ii--) { - num += b[ii] * cp; - cp *= pow28; - } - return num; -} - - -/* ---- Public functions ---- - * - * See header file for description and usage. - * - */ - -size_t bshuf_default_block_size(const size_t elem_size) { - // This function needs to be absolutely stable between versions. - // Otherwise encoded data will not be decodable. - - size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size; - // Ensure it is a required multiple. - block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; - return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK); -} - - -int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size) { - - return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size, - elem_size, block_size); -} - - -int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size) { - - return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size, - elem_size, block_size); -} - - -#undef TRANS_BIT_8X8 -#undef TRANS_ELEM_TYPE -#undef MAX -#undef CHECK_MULT_EIGHT -#undef CHECK_ERR_FREE - -#undef USESSE2 -#undef USEAVX2 diff --git a/src/bitshuffle/src/bitshuffle_core.h b/src/bitshuffle/src/bitshuffle_core.h deleted file mode 100644 index 7f66b6d3..00000000 --- a/src/bitshuffle/src/bitshuffle_core.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Bitshuffle - Filter for improving compression of typed binary data. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - * - * Header File - * - * Worker routines return an int64_t which is the number of bytes processed - * if positive or an error code if negative. - * - * Error codes: - * -1 : Failed to allocate memory. - * -11 : Missing SSE. - * -12 : Missing AVX. - * -13 : Missing Arm Neon. - * -80 : Input size not a multiple of 8. - * -81 : block_size not multiple of 8. - * -91 : Decompression error, wrong number of bytes processed. - * -1YYY : Error internal to compression routine with error code -YYY. - */ - - -#ifndef BITSHUFFLE_CORE_H -#define BITSHUFFLE_CORE_H - -// We assume GNU g++ defining `__cplusplus` has stdint.h -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) -#include -#else - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef signed int int32_t; - typedef unsigned long long uint64_t; - typedef long long int64_t; -#endif - -#include - - -// These are usually set in the setup.py. -#ifndef BSHUF_VERSION_MAJOR -#define BSHUF_VERSION_MAJOR 0 -#define BSHUF_VERSION_MINOR 3 -#define BSHUF_VERSION_POINT 5 -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* --- bshuf_using_SSE2 ---- - * - * Whether routines where compiled with the SSE2 instruction set. - * - * Returns - * ------- - * 1 if using SSE2, 0 otherwise. - * - */ -int bshuf_using_SSE2(void); - - -/* ---- bshuf_using_AVX2 ---- - * - * Whether routines where compiled with the AVX2 instruction set. - * - * Returns - * ------- - * 1 if using AVX2, 0 otherwise. - * - */ -int bshuf_using_AVX2(void); - - -/* ---- bshuf_default_block_size ---- - * - * The default block size as function of element size. - * - * This is the block size used by the blocked routines (any routine - * taking a *block_size* argument) when the block_size is not provided - * (zero is passed). - * - * The results of this routine are guaranteed to be stable such that - * shuffled/compressed data can always be decompressed. - * - * Parameters - * ---------- - * elem_size : element size of data to be shuffled/compressed. - * - */ -size_t bshuf_default_block_size(const size_t elem_size); - - -/* ---- bshuf_bitshuffle ---- - * - * Bitshuffle the data. - * - * Transpose the bits within elements, in blocks of *block_size* - * elements. - * - * Parameters - * ---------- - * in : input buffer, must be of size * elem_size bytes - * out : output buffer, must be of size * elem_size bytes - * size : number of elements in input - * elem_size : element size of typed data - * block_size : Do transpose in blocks of this many elements. Pass 0 to - * select automatically (recommended). - * - * Returns - * ------- - * number of bytes processed, negative error-code if failed. - * - */ -int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size); - - -/* ---- bshuf_bitunshuffle ---- - * - * Unshuffle bitshuffled data. - * - * Untranspose the bits within elements, in blocks of *block_size* - * elements. - * - * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* - * must match the parameters used to shuffle the data. - * - * Parameters - * ---------- - * in : input buffer, must be of size * elem_size bytes - * out : output buffer, must be of size * elem_size bytes - * size : number of elements in input - * elem_size : element size of typed data - * block_size : Do transpose in blocks of this many elements. Pass 0 to - * select automatically (recommended). - * - * Returns - * ------- - * number of bytes processed, negative error-code if failed. - * - */ -int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, - const size_t elem_size, size_t block_size); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // BITSHUFFLE_CORE_H diff --git a/src/bitshuffle/src/bitshuffle_internals.h b/src/bitshuffle/src/bitshuffle_internals.h deleted file mode 100644 index e039925c..00000000 --- a/src/bitshuffle/src/bitshuffle_internals.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Bitshuffle - Filter for improving compression of typed binary data. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - */ - - -#ifndef BITSHUFFLE_INTERNALS_H -#define BITSHUFFLE_INTERNALS_H - -// We assume GNU g++ defining `__cplusplus` has stdint.h -#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) -#include -#else - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef signed int int32_t; - typedef unsigned long long uint64_t; - typedef long long int64_t; -#endif - -#include -#include "iochain.h" - - -// Constants. -#ifndef BSHUF_MIN_RECOMMEND_BLOCK -#define BSHUF_MIN_RECOMMEND_BLOCK 128 -#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this. -#define BSHUF_TARGET_BLOCK_SIZE_B 8192 -#endif - - -// Macros. -#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; } - - -#ifdef __cplusplus -extern "C" { -#endif - -/* ---- Utility functions for internal use only ---- */ - -int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, - const size_t elem_size); - -/* Read a 32 bit unsigned integer from a buffer big endian order. */ -uint32_t bshuf_read_uint32_BE(const void* buf); - -/* Write a 32 bit unsigned integer to a buffer in big endian order. */ -void bshuf_write_uint32_BE(void* buf, uint32_t num); - -int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, - const size_t elem_size); - -/* Function definition for worker functions that process a single block. */ -typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr, - const size_t size, const size_t elem_size); - -/* Wrap a function for processing a single block to process an entire buffer in - * parallel. */ -int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, - const size_t size, const size_t elem_size, size_t block_size); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // BITSHUFFLE_INTERNALS_H diff --git a/src/bitshuffle/src/bshuf_h5filter.c b/src/bitshuffle/src/bshuf_h5filter.c deleted file mode 100644 index f67a4a2b..00000000 --- a/src/bitshuffle/src/bshuf_h5filter.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Bitshuffle HDF5 filter - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - -#include "bitshuffle.h" -#include "bshuf_h5filter.h" - - -#define PUSH_ERR(func, minor, str) \ - H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) - - -// Prototypes from bitshuffle.c -void bshuf_write_uint64_BE(void* buf, uint64_t num); -uint64_t bshuf_read_uint64_BE(void* buf); -void bshuf_write_uint32_BE(void* buf, uint32_t num); -uint32_t bshuf_read_uint32_BE(const void* buf); - - -// Only called on compresion, not on reverse. -herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){ - - herr_t r; - size_t ii; - - unsigned int elem_size; - - unsigned int flags; - size_t nelements = 8; - size_t nelem_max = 11; - unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0}; - unsigned tmp_values[] = {0,0,0,0,0,0,0,0}; - char msg[80]; - - r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements, - tmp_values, 0, NULL, NULL); - if(r<0) return -1; - - // First 3 slots reserved. Move any passed options to higher addresses. - for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) { - values[ii + 3] = tmp_values[ii]; - } - - nelements = 3 + nelements; - - values[0] = BSHUF_VERSION_MAJOR; - values[1] = BSHUF_VERSION_MINOR; - - elem_size = H5Tget_size(type); - if(elem_size <= 0) { - PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, - "Invalid element size."); - return -1; - } - - values[2] = elem_size; - - // Validate user supplied arguments. - if (nelements > 3) { - if (values[3] % 8 || values[3] < 0) { - sprintf(msg, "Error in bitshuffle. Invalid block size: %d.", - values[3]); - PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg); - return -1; - } - } - if (nelements > 4) { - switch (values[4]) { - case 0: - break; - case BSHUF_H5_COMPRESS_LZ4: - break; - default: - PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, - "Invalid bitshuffle compression."); - } - } - - r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values); - if(r<0) return -1; - - return 1; -} - - -size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts, - const unsigned int cd_values[], size_t nbytes, - size_t *buf_size, void **buf) { - - size_t size, elem_size; - int err; - char msg[80]; - size_t block_size = 0; - size_t buf_size_out, nbytes_uncomp, nbytes_out; - char* in_buf = *buf; - void *out_buf; - - if (cd_nelmts < 3) { - PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, - "Not enough parameters."); - return 0; - } - elem_size = cd_values[2]; - - // User specified block size. - if (cd_nelmts > 3) block_size = cd_values[3]; - - if (block_size == 0) block_size = bshuf_default_block_size(elem_size); - - // Compression in addition to bitshiffle. - if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { - if (flags & H5Z_FLAG_REVERSE) { - // First eight bytes is the number of bytes in the output buffer, - // little endian. - nbytes_uncomp = bshuf_read_uint64_BE(in_buf); - // Override the block size with the one read from the header. - block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size; - // Skip over the header. - in_buf += 12; - buf_size_out = nbytes_uncomp; - } else { - nbytes_uncomp = nbytes; - buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, - elem_size, block_size) + 12; - } - } else { - nbytes_uncomp = nbytes; - buf_size_out = nbytes; - } - - // TODO, remove this restriction by memcopying the extra. - if (nbytes_uncomp % elem_size) { - PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, - "Non integer number of elements."); - return 0; - } - size = nbytes_uncomp / elem_size; - - out_buf = malloc(buf_size_out); - if (out_buf == NULL) { - PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, - "Could not allocate output buffer."); - return 0; - } - - if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { - if (flags & H5Z_FLAG_REVERSE) { - // Bit unshuffle/decompress. - err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size); - nbytes_out = nbytes_uncomp; - } else { - // Bit shuffle/compress. - // Write the header, described in - // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. - // Techincally we should be using signed integers instead of - // unsigned ones, however for valid inputs (positive numbers) these - // have the same representation. - bshuf_write_uint64_BE(out_buf, nbytes_uncomp); - bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size); - err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size, - elem_size, block_size); nbytes_out = err + 12; } } else { - if (flags & H5Z_FLAG_REVERSE) { - // Bit unshuffle. - err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size, - block_size); } else { - // Bit shuffle. - err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size, - block_size); } nbytes_out = nbytes; } - //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n", - //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size); - - if (err < 0) { - sprintf(msg, "Error in bitshuffle with error code %d.", err); - PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg); - free(out_buf); - return 0; - } else { - free(*buf); - *buf = out_buf; - *buf_size = buf_size_out; - - return nbytes_out; - } -} - - - -H5Z_class_t bshuf_H5Filter[1] = {{ - H5Z_CLASS_T_VERS, - (H5Z_filter_t)(BSHUF_H5FILTER), - 1, 1, - "bitshuffle; see https://github.com/kiyo-masui/bitshuffle", - NULL, - (H5Z_set_local_func_t)(bshuf_h5_set_local), - (H5Z_func_t)(bshuf_h5_filter) -}}; - - -int bshuf_register_h5filter(void){ - - int retval; - - retval = H5Zregister(bshuf_H5Filter); - if(retval<0){ - PUSH_ERR("bshuf_register_h5filter", - H5E_CANTREGISTER, "Can't register bitshuffle filter"); - } - return retval; -} - diff --git a/src/bitshuffle/src/bshuf_h5filter.h b/src/bitshuffle/src/bshuf_h5filter.h deleted file mode 100644 index 0a8fa6a3..00000000 --- a/src/bitshuffle/src/bshuf_h5filter.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Bitshuffle HDF5 filter - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - * - * Header File - * - * Filter Options - * -------------- - * block_size (option slot 0) : interger (optional) - * What block size to use (in elements not bytes). Default is 0, - * for which bitshuffle will pick a block size with a target of 8kb. - * Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4 - * Whether to apply LZ4 compression to the data after bitshuffling. - * This is much faster than applying compression as a second filter - * because it is done when the small block of data is already in the - * L1 cache. - * - * For LZ4 compression, the compressed format of the data is the same as - * for the normal LZ4 filter described in - * http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. - * - */ - - -#ifndef BSHUF_H5FILTER_H -#define BSHUF_H5FILTER_H - -#define H5Z_class_t_vers 2 -#include "hdf5.h" - - -#define BSHUF_H5FILTER 32008 - - -#define BSHUF_H5_COMPRESS_LZ4 2 - - -extern H5Z_class_t bshuf_H5Filter[1]; - - -/* ---- bshuf_register_h5filter ---- - * - * Register the bitshuffle HDF5 filter within the HDF5 library. - * - * Call this before using the bitshuffle HDF5 filter from C unless - * using dynamically loaded filters. - * - */ -int bshuf_register_h5filter(void); - - -#endif // BSHUF_H5FILTER_H diff --git a/src/bitshuffle/src/bshuf_h5plugin.c b/src/bitshuffle/src/bshuf_h5plugin.c deleted file mode 100644 index 22e99929..00000000 --- a/src/bitshuffle/src/bshuf_h5plugin.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Dynamically loaded filter plugin for HDF5 Bitshuffle filter. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - - -#include "bshuf_h5filter.h" -#include "H5PLextern.h" - -H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} -const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;} - diff --git a/src/bitshuffle/src/iochain.c b/src/bitshuffle/src/iochain.c deleted file mode 100644 index baa97296..00000000 --- a/src/bitshuffle/src/iochain.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * IOchain - Distribute a chain of dependant IO events amoung threads. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - -#include -#include "iochain.h" - - -void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) { -#ifdef _OPENMP - omp_init_lock(&C->next_lock); - for (size_t ii = 0; ii < IOC_SIZE; ii ++) { - omp_init_lock(&(C->in_pl[ii].lock)); - omp_init_lock(&(C->out_pl[ii].lock)); - } -#endif - C->next = 0; - C->in_pl[0].ptr = in_ptr_0; - C->out_pl[0].ptr = out_ptr_0; -} - - -void ioc_destroy(ioc_chain *C) { -#ifdef _OPENMP - omp_destroy_lock(&C->next_lock); - for (size_t ii = 0; ii < IOC_SIZE; ii ++) { - omp_destroy_lock(&(C->in_pl[ii].lock)); - omp_destroy_lock(&(C->out_pl[ii].lock)); - } -#endif -} - - -const void * ioc_get_in(ioc_chain *C, size_t *this_iter) { -#ifdef _OPENMP - omp_set_lock(&C->next_lock); - #pragma omp flush -#endif - *this_iter = C->next; - C->next ++; -#ifdef _OPENMP - omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock)); - omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); - omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); - omp_unset_lock(&C->next_lock); -#endif - return C->in_pl[*this_iter % IOC_SIZE].ptr; -} - - -void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) { - C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr; -#ifdef _OPENMP - omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); -#endif -} - - -void * ioc_get_out(ioc_chain *C, size_t *this_iter) { -#ifdef _OPENMP - omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); - #pragma omp flush -#endif - void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr; -#ifdef _OPENMP - omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); -#endif - return out_ptr; -} - - -void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) { - C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr; -#ifdef _OPENMP - omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); - // *in_pl[this_iter]* lock released at the end of the iteration to avoid being - // overtaken by previous threads and having *out_pl[this_iter]* corrupted. - // Especially worried about thread 0, iteration 0. - omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock)); -#endif -} - diff --git a/src/bitshuffle/src/iochain.h b/src/bitshuffle/src/iochain.h deleted file mode 100644 index 4e225d1b..00000000 --- a/src/bitshuffle/src/iochain.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * IOchain - Distribute a chain of dependant IO events amoung threads. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - * - * Header File - * - * Similar in concept to a queue. Each task includes reading an input - * and writing output, but the location of the input/output (the pointers) - * depend on the previous item in the chain. - * - * This is designed for parallelizing blocked compression/decompression IO, - * where the destination of a compressed block depends on the compressed size - * of all previous blocks. - * - * Implemented with OpenMP locks. - * - * - * Usage - * ----- - * - Call `ioc_init` in serial block. - * - Each thread should create a local variable *size_t this_iter* and - * pass its address to all function calls. Its value will be set - * inside the functions and is used to identify the thread. - * - Each thread must call each of the `ioc_get*` and `ioc_set*` methods - * exactly once per iteration, starting with `ioc_get_in` and ending - * with `ioc_set_next_out`. - * - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`, - * `ioc_set_next_out`, *work*) is most efficient. - * - Have each thread call `ioc_end_pop`. - * - `ioc_get_in` is blocked until the previous entry's - * `ioc_set_next_in` is called. - * - `ioc_get_out` is blocked until the previous entry's - * `ioc_set_next_out` is called. - * - There are no blocks on the very first iteration. - * - Call `ioc_destroy` in serial block. - * - Safe for num_threads >= IOC_SIZE (but less efficient). - * - */ - - -#ifndef IOCHAIN_H -#define IOCHAIN_H - - -#include -#ifdef _OPENMP -#include -#endif - - -#define IOC_SIZE 33 - - -typedef struct ioc_ptr_and_lock { -#ifdef _OPENMP - omp_lock_t lock; -#endif - void *ptr; -} ptr_and_lock; - -typedef struct ioc_const_ptr_and_lock { -#ifdef _OPENMP - omp_lock_t lock; -#endif - const void *ptr; -} const_ptr_and_lock; - - -typedef struct ioc_chain { -#ifdef _OPENMP - omp_lock_t next_lock; -#endif - size_t next; - const_ptr_and_lock in_pl[IOC_SIZE]; - ptr_and_lock out_pl[IOC_SIZE]; -} ioc_chain; - - -void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0); -void ioc_destroy(ioc_chain *C); -const void * ioc_get_in(ioc_chain *C, size_t *this_iter); -void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr); -void * ioc_get_out(ioc_chain *C, size_t *this_iter); -void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr); - -#endif // IOCHAIN_H - diff --git a/src/bitshuffle/src/lzf_h5plugin.c b/src/bitshuffle/src/lzf_h5plugin.c deleted file mode 100644 index cbf7e3d8..00000000 --- a/src/bitshuffle/src/lzf_h5plugin.c +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Dynamically loaded filter plugin for HDF5 LZF filter. - * - * This file is part of Bitshuffle - * Author: Kiyoshi Masui - * Website: http://www.github.com/kiyo-masui/bitshuffle - * Created: 2014 - * - * See LICENSE file for details about copyright and rights to use. - * - */ - - -#define H5Z_class_t_vers 2 -#include "lzf_filter.h" -#include "H5PLextern.h" - -#include - - -size_t lzf_filter(unsigned flags, size_t cd_nelmts, - const unsigned cd_values[], size_t nbytes, - size_t *buf_size, void **buf); - - -herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); - - -H5Z_class_t lzf_H5Filter[1] = {{ - H5Z_CLASS_T_VERS, - (H5Z_filter_t)(H5PY_FILTER_LZF), - 1, 1, - "lzf", - NULL, - (H5Z_set_local_func_t)(lzf_set_local), - (H5Z_func_t)(lzf_filter) -}}; - - -H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} -const void* H5PLget_plugin_info(void) {return lzf_H5Filter;} - From 3db00d4d3691551ff33b4068a5b9e900d9140ec5 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Tue, 8 Nov 2022 11:43:35 +0100 Subject: [PATCH 5/7] Squashed 'src/bitshuffle/' content from commit a60471d3 git-subtree-dir: src/bitshuffle git-subtree-split: a60471d37a8cbbd8265dc8cfa83a9320abdcb590 --- .github/workflows/flake8_cython.cfg | 4 + .github/workflows/flake8_python.cfg | 3 + .github/workflows/install_hdf5.sh | 10 + .github/workflows/lint.yml | 32 + .github/workflows/main.yml | 58 + .github/workflows/wheels.yml | 98 ++ .gitignore | 79 + .gitmodules | 3 + LICENSE | 21 + MANIFEST.in | 10 + README.rst | 246 +++ bitshuffle/__init__.py | 54 + bitshuffle/ext.pyx | 569 ++++++ bitshuffle/h5.pyx | 235 +++ conda-recipe/bld.bat | 3 + conda-recipe/build.sh | 2 + conda-recipe/meta.yaml | 27 + conda-recipe/setup.py.patch | 13 + lz4/LICENSE | 24 + lz4/README.md | 21 + lz4/lz4.c | 2495 +++++++++++++++++++++++++++ lz4/lz4.h | 774 +++++++++ lzf/LICENSE.txt | 34 + lzf/README.txt | 84 + lzf/README_bitshuffle.txt | 3 + lzf/example.c | 106 ++ lzf/lzf/lzf.h | 100 ++ lzf/lzf/lzfP.h | 166 ++ lzf/lzf/lzf_c.c | 296 ++++ lzf/lzf/lzf_d.c | 154 ++ lzf/lzf_filter.c | 261 +++ lzf/lzf_filter.h | 38 + pyproject.toml | 10 + requirements.txt | 5 + setup.cfg.example | 10 + setup.py | 419 +++++ src/bitshuffle.c | 279 +++ src/bitshuffle.h | 205 +++ src/bitshuffle_core.c | 1864 ++++++++++++++++++++ src/bitshuffle_core.h | 169 ++ src/bitshuffle_internals.h | 75 + src/bshuf_h5filter.c | 260 +++ src/bshuf_h5filter.h | 67 + src/bshuf_h5plugin.c | 19 + src/hdf5_dl.c | 358 ++++ src/iochain.c | 90 + src/iochain.h | 94 + src/lzf_h5plugin.c | 42 + tests/data/regression_0.1.3.h5 | Bin 0 -> 114447 bytes tests/data/regression_0.4.0.h5 | Bin 0 -> 194482 bytes tests/make_regression_tdata.py | 69 + tests/test_ext.py | 627 +++++++ tests/test_h5filter.py | 138 ++ tests/test_h5plugin.py | 66 + tests/test_regression.py | 46 + zstd | 1 + 56 files changed, 10936 insertions(+) create mode 100644 .github/workflows/flake8_cython.cfg create mode 100644 .github/workflows/flake8_python.cfg create mode 100644 .github/workflows/install_hdf5.sh create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/wheels.yml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 100644 bitshuffle/__init__.py create mode 100644 bitshuffle/ext.pyx create mode 100644 bitshuffle/h5.pyx create mode 100644 conda-recipe/bld.bat create mode 100644 conda-recipe/build.sh create mode 100644 conda-recipe/meta.yaml create mode 100644 conda-recipe/setup.py.patch create mode 100644 lz4/LICENSE create mode 100644 lz4/README.md create mode 100644 lz4/lz4.c create mode 100644 lz4/lz4.h create mode 100644 lzf/LICENSE.txt create mode 100644 lzf/README.txt create mode 100644 lzf/README_bitshuffle.txt create mode 100644 lzf/example.c create mode 100644 lzf/lzf/lzf.h create mode 100644 lzf/lzf/lzfP.h create mode 100644 lzf/lzf/lzf_c.c create mode 100644 lzf/lzf/lzf_d.c create mode 100644 lzf/lzf_filter.c create mode 100644 lzf/lzf_filter.h create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 setup.cfg.example create mode 100644 setup.py create mode 100644 src/bitshuffle.c create mode 100644 src/bitshuffle.h create mode 100644 src/bitshuffle_core.c create mode 100644 src/bitshuffle_core.h create mode 100644 src/bitshuffle_internals.h create mode 100644 src/bshuf_h5filter.c create mode 100644 src/bshuf_h5filter.h create mode 100644 src/bshuf_h5plugin.c create mode 100644 src/hdf5_dl.c create mode 100644 src/iochain.c create mode 100644 src/iochain.h create mode 100644 src/lzf_h5plugin.c create mode 100644 tests/data/regression_0.1.3.h5 create mode 100644 tests/data/regression_0.4.0.h5 create mode 100644 tests/make_regression_tdata.py create mode 100644 tests/test_ext.py create mode 100644 tests/test_h5filter.py create mode 100644 tests/test_h5plugin.py create mode 100644 tests/test_regression.py create mode 160000 zstd diff --git a/.github/workflows/flake8_cython.cfg b/.github/workflows/flake8_cython.cfg new file mode 100644 index 00000000..9e5b5389 --- /dev/null +++ b/.github/workflows/flake8_cython.cfg @@ -0,0 +1,4 @@ +[flake8] +filename=*.pyx,*.pxd +select=E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411 +show_source=True diff --git a/.github/workflows/flake8_python.cfg b/.github/workflows/flake8_python.cfg new file mode 100644 index 00000000..b0760928 --- /dev/null +++ b/.github/workflows/flake8_python.cfg @@ -0,0 +1,3 @@ +[flake8] +ignore=E501,E203,W503,E266 +show_source=True diff --git a/.github/workflows/install_hdf5.sh b/.github/workflows/install_hdf5.sh new file mode 100644 index 00000000..58b2bdb4 --- /dev/null +++ b/.github/workflows/install_hdf5.sh @@ -0,0 +1,10 @@ +HDF5_VERSION=$1 + +# Download and install HDF5 $HDF5_VERSION from source for building wheels +curl https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz -O -s +tar -xzf hdf5-$HDF5_VERSION.tar.gz +cd hdf5-$HDF5_VERSION +./configure --prefix=/usr/local +make -j 2 +make install +cd .. diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..6d828a1c --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,32 @@ +name: bitshuffle-ci-build +on: + pull_request: + branches: + - master + push: + branches: + - master + +jobs: + + lint-code: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Install pip dependencies + run: | + pip install black flake8 + + - name: Run flake8 + run: | + flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_python.cfg bitshuffle tests + flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_cython.cfg bitshuffle tests + + - name: Check code with black + run: black --check . diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000..8ec96b64 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,58 @@ +name: bitshuffle-ci-build +on: + pull_request: + branches: + - master + push: + branches: + - master + +jobs: + run-tests: + + strategy: + matrix: + python-version: ["3.6", "3.7", "3.10"] + os: [ubuntu-latest, macos-latest] + exclude: + - os: macos-latest + python-version: "3.6" + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + + - name: Install apt dependencies + if: ${{ matrix.os == 'ubuntu-latest' }} + run: | + sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config + + - name: Install homebrew dependencies + if: ${{ matrix.os == 'macos-latest' }} + run: | + brew install hdf5 pkg-config + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install h5py + if: ${{ matrix.os == 'macos-latest' }} + run: | + pip install h5py + + - name: Install pip dependencies + run: | + pip install Cython + pip install -r requirements.txt + pip install pytest + + # Pull in ZSTD repo + git submodule update --init + + # Installing the plugin to arbitrary directory to check the install script. + python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib --zstd + + - name: Run tests + run: pytest -v . diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml new file mode 100644 index 00000000..def84e0b --- /dev/null +++ b/.github/workflows/wheels.yml @@ -0,0 +1,98 @@ +name: Build bitshuffle wheels and upload to PyPI + +on: + workflow_dispatch: + release: + types: + - published + +jobs: + build_wheels: + name: Build wheels on ${{ matrix.os }} and hdf5-${{ matrix.hdf5 }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + hdf5: ["1.10.7"] + + steps: + # Checkout bitshuffle + - uses: actions/checkout@v2 + + # Build wheels for linux and x86 platforms + - name: Build wheels + uses: pypa/cibuildwheel@v2.3.1 + with: + output-dir: ./wheelhouse-hdf5-${{ matrix.hdf5}} + env: + CIBW_SKIP: "pp* *musllinux*" + CIBW_ARCHS_LINUX: "x86_64" + CIBW_BEFORE_ALL: | + chmod +x .github/workflows/install_hdf5.sh + .github/workflows/install_hdf5.sh ${{ matrix.hdf5 }} + git submodule update --init + CIBW_ENVIRONMENT: | + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ENABLE_ZSTD=1 + CIBW_TEST_REQUIRES: pytest + # Install different version of HDF5 for unit tests to ensure the + # wheels are independent of HDF5 installation + # CIBW_BEFORE_TEST: | + # chmod +x .github/workflows/install_hdf5.sh + # .github/workflows/install_hdf5.sh 1.8.11 + # Run units tests but disable test_h5plugin.py + CIBW_TEST_COMMAND: pytest {package}/tests + + # Package wheels and host on CI + - uses: actions/upload-artifact@v2 + with: + path: ./wheelhouse-hdf5-${{ matrix.hdf5 }}/*.whl + + build_sdist: + name: Build source distribution + strategy: + matrix: + python-version: ["3.8"] + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Install apt dependencies + run: | + sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config + + - name: Install Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip dependencies + run: | + pip install -r requirements.txt + + - name: Build sdist + run: python setup.py sdist + + - uses: actions/upload-artifact@v2 + with: + path: dist/*.tar.gz + + # Upload to PyPI + upload_pypi: + needs: [build_wheels, build_sdist] + runs-on: ubuntu-latest + # Upload to PyPI on every tag + # if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags') + # Alternatively, to publish when a GitHub Release is created, use the following rule: + if: github.event_name == 'release' && github.event.action == 'published' + steps: + - uses: actions/download-artifact@v2 + with: + name: artifact + path: dist + + - uses: pypa/gh-action-pypi-publish@v1.4.2 + with: + user: __token__ + password: ${{ secrets.pypi_password }} + # To test: repository_url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f4a98eab --- /dev/null +++ b/.gitignore @@ -0,0 +1,79 @@ +## C + +# Object files +*.o +*.ko +*.obj +*.elf + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + + +## Python +*.py[cod] + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 +__pycache__ + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Documentation builds +doc/_build +doc/generated + +## Editor files and backups. +*.swp +*.swo + +# Generated files +bitshuffle/ext.c +bitshuffle/h5.c + +# ItelliJ +.idea diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..5ebea353 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "zstd"] + path = zstd + url = https://github.com/facebook/zstd diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..1365ed69 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +Bitshuffle - Filter for improving compression of typed binary data. + +Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..00746c64 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +recursive-include src *.h *.c +recursive-include bitshuffle *.pyx +recursive-include lz4 *.h *.c +recursive-include lzf *.h *.c +include setup.cfg.example +include LICENSE +include README.rst +include requirements.txt +exclude setup.cfg + diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..7e4be25f --- /dev/null +++ b/README.rst @@ -0,0 +1,246 @@ +========== +Bitshuffle +========== + +Filter for improving compression of typed binary data. + +Bitshuffle is an algorithm that rearranges typed, binary data for improving +compression, as well as a python/C package that implements this algorithm +within the Numpy framework. + +The library can be used along side HDF5 to compress and decompress datasets and +is integrated through the `dynamically loaded filters`_ framework. Bitshuffle +is HDF5 filter number ``32008``. + +Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_ +except it operates at the bit level instead of the byte level. Arranging a +typed data array in to a matrix with the elements as the rows and the bits +within the elements as the columns, Bitshuffle "transposes" the matrix, +such that all the least-significant-bits are in a row, etc. This transpose +is performed within blocks of data roughly 8kB long [1]_. + +This does not in itself compress data, only rearranges it for more efficient +compression. To perform the actual compression you will need a compression +library. Bitshuffle has been designed to be well matched to Marc Lehmann's +LZF_ as well as LZ4_ and ZSTD_. Note that because Bitshuffle modifies the data at the bit +level, sophisticated entropy reducing compression libraries such as GZIP and +BZIP are unlikely to achieve significantly better compression than simpler and +faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD. Bitshuffle +thus includes routines (and HDF5 filter options) to apply LZ4 and ZSTD compression to +each block after shuffling [2]_. + +The Bitshuffle algorithm relies on neighbouring elements of a dataset being +highly correlated to improve data compression. Any correlations that span at +least 24 elements of the dataset may be exploited to improve compression. + +Bitshuffle was designed with performance in mind. On most machines the +time required for Bitshuffle+LZ4 is insignificant compared to the time required +to read or write the compressed data to disk. Because it is able to exploit the +SSE and AVX instruction sets present on modern Intel and AMD processors, on +these machines compression is only marginally slower than an out-of-cache +memory copy. On modern x86 processors you can expect Bitshuffle to have a +throughput of roughly 1 byte per clock cycle, and on the Haswell generation of +Intel processors (2013) and later, you can expect up to 2 bytes per clock +cycle. In addition, Bitshuffle is parallelized using OpenMP. + +As a bonus, Bitshuffle ships with a dynamically loaded version of +`h5py`'s LZF compression filter, such that the filter can be transparently +used outside of python and in command line utilities such as ``h5dump``. + +.. [1] Chosen to fit comfortably within L1 cache as well as be well matched + window of the LZF compression library. + +.. [2] Over applying bitshuffle to the full dataset then applying LZ4/ZSTD + compression, this has the tremendous advantage that the block is + already in the L1 cache. + +.. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf + +.. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf + +.. _LZF: http://oldhome.schmorp.de/marc/liblzf.html + +.. _LZ4: https://code.google.com/p/lz4/ + +.. _ZSTD: https://github.com/facebook/zstd + + +Applications +------------ + +Bitshuffle might be right for your application if: + +- You need to compress typed binary data. +- Your data is arranged such that adjacent elements over the fastest varying + index of your dataset are similar (highly correlated). +- A special case of the previous point is if you are only exercising a subset + of the bits in your data-type, as is often true of integer data. +- You need both high compression ratios and high performance. + + +Comparing Bitshuffle to other compression algorithms and HDF5 filters: + +- Bitshuffle is less general than many other compression algorithms. + To achieve good compression ratios, consecutive elements of your data must + be highly correlated. +- For the right datasets, Bitshuffle is one of the few compression + algorithms that promises both high throughput and high compression ratios. +- Bitshuffle should have roughly the same throughput as Shuffle, but + may obtain higher compression ratios. +- The MAFISC_ filter actually includes something similar to Bitshuffle as one of + its prefilters, However, MAFICS's emphasis is on obtaining high compression + ratios at all costs, sacrificing throughput. + +.. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc + + +Installation for Python +----------------------- + +Installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python +(h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically +loaded HDF5 filter requires HDF5 1.8.11 or later. If ZSTD support is enabled the ZSTD +repo needs to pulled into bitshuffle before installation with:: + + git submodule update --init + +To install bitshuffle:: + + python setup.py install [--h5plugin [--h5plugin-dir=spam] --zstd] + +To get finer control of installation options, including whether to compile +with OpenMP multi-threading, copy the ``setup.cfg.example`` to ``setup.cfg`` +and edit the values therein. + +If using the dynamically loaded HDF5 filter (which gives you access to the +Bitshuffle and LZF filters outside of python), set the environment variable +``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default +search location of ``/usr/local/hdf5/lib/plugin``. + +ZSTD support is enabled with ``--zstd``. + +If you get an error about missing source files when building the extensions, +try upgrading setuptools. There is a weird bug where setuptools prior to 0.7 +doesn't work properly with Cython in some cases. + +.. _source: http://docs.h5py.org/en/latest/build.html#source-installation + + +Usage from Python +----------------- + +The `bitshuffle` module contains routines for shuffling and unshuffling +Numpy arrays. + +If installed with the dynamically loaded filter plugins, Bitshuffle can be used +in conjunction with HDF5 both inside and outside of python, in the same way as +any other filter; simply by specifying the filter number ``32008``. Otherwise +the filter will be available only within python and only after importing +`bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent. +The filter can be added to new datasets either through the `h5py` low level +interface or through the convenience functions provided in +`bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py` +version 2.5.0 and later Bitshuffle can be added to new datasets through the +high level interface, as in the example below. + +The compression algorithm can be configured using the `filter_opts` in +`bitshuffle.h5.create_dataset()`. LZ4 is chosen with: +`(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with: +`(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)`. See `test_h5filter.py` for an example. + +Example h5py +------------ +:: + + import h5py + import numpy + import bitshuffle.h5 + + print(h5py.__version__) # >= '2.5.0' + + f = h5py.File(filename, "w") + + # block_size = 0 let Bitshuffle choose its value + block_size = 0 + + dataset = f.create_dataset( + "data", + (100, 100, 100), + compression=bitshuffle.h5.H5FILTER, + compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4), + dtype='float32', + ) + + # create some random data + array = numpy.random.rand(100, 100, 100) + array = array.astype('float32') + + dataset[:] = array + + f.close() + + +Usage from C +------------ + +If you wish to use Bitshuffle in your C program and would prefer not to use the +HDF5 dynamically loaded filter, the C library in the ``src/`` directory is +self-contained and complete. + + +Usage from Java +--------------- + +You can use Bitshuffle even in Java and the routines for shuffling and unshuffling +are ported into `snappy-java`_. To use the routines, you need to add the following +dependency to your pom.xml:: + + + org.xerial.snappy + snappy-java + 1.1.3-M1 + + +First, import org.xerial.snapy.BitShuffle in your Java code:: + + import org.xerial.snappy.BitShuffle; + +Then, you use them like this:: + + int[] data = new int[] {1, 3, 34, 43, 34}; + byte[] shuffledData = BitShuffle.bitShuffle(data); + int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData); + +.. _`snappy-java`: https://github.com/xerial/snappy-java + + +Anaconda +-------- + +The conda package can be build via:: + + conda build conda-recipe + + +For Best Results +---------------- + +Here are a few tips to help you get the most out of Bitshuffle: + +- For multi-dimensional datasets, order your data such that the fastest varying + dimension is the one over which your data is most correlated (have + values that change the least), or fake this using chunks. +- To achieve the highest throughput, use a data type that is 64 *bytes* or + smaller. If you have a very large compound data type, consider adding a + dimension to your datasets instead. +- To make full use of the SSE2 instruction set, use a data type whose size + is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose + size is a multiple of 4 bytes. + + +Citing Bitshuffle +----------------- + +Bitshuffle was initially described in +http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at +http://arxiv.org/abs/1503.00638. diff --git a/bitshuffle/__init__.py b/bitshuffle/__init__.py new file mode 100644 index 00000000..3f7c0380 --- /dev/null +++ b/bitshuffle/__init__.py @@ -0,0 +1,54 @@ +# flake8: noqa +""" +Filter for improving compression of typed binary data. + +Functions +========= + + using_NEON + using_SSE2 + using_AVX2 + bitshuffle + bitunshuffle + compress_lz4 + decompress_lz4 + compress_zstd + decompress_zstd + +""" + +from __future__ import absolute_import + + +from bitshuffle.ext import ( + __version__, + __zstd__, + bitshuffle, + bitunshuffle, + using_NEON, + using_SSE2, + using_AVX2, + compress_lz4, + decompress_lz4, +) + +# Import ZSTD API if enabled +zstd_api = [] +if __zstd__: + from bitshuffle.ext import ( + compress_zstd, + decompress_zstd, + ) + + zstd_api += ["compress_zstd", "decompress_zstd"] + +__all__ = [ + "__version__", + "bitshuffle", + "bitunshuffle", + "using_NEON", + "using_SSE2", + "using_AVX2", + "compress_lz4", + "decompress_lz4", +] + zstd_api diff --git a/bitshuffle/ext.pyx b/bitshuffle/ext.pyx new file mode 100644 index 00000000..edc9c588 --- /dev/null +++ b/bitshuffle/ext.pyx @@ -0,0 +1,569 @@ +""" +Wrappers for public and private bitshuffle routines + +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import numpy as np + +cimport numpy as np +cimport cython + + +np.import_array() + + +# Repeat each calculation this many times. For timing. +cdef int REPEATC = 1 +# cdef int REPEATC = 32 + +REPEAT = REPEATC + +cdef extern from b"bitshuffle.h": + int bshuf_using_NEON() + int bshuf_using_SSE2() + int bshuf_using_AVX2() + int bshuf_bitshuffle(void *A, void *B, int size, int elem_size, + int block_size) nogil + int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size, + int block_size) nogil + int bshuf_compress_lz4_bound(int size, int elem_size, int block_size) + int bshuf_compress_lz4(void *A, void *B, int size, int elem_size, + int block_size) nogil + int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size, + int block_size) nogil + IF ZSTD_SUPPORT: + int bshuf_compress_zstd_bound(int size, int elem_size, int block_size) + int bshuf_compress_zstd(void *A, void *B, int size, int elem_size, + int block_size, const int comp_lvl) nogil + int bshuf_decompress_zstd(void *A, void *B, int size, int elem_size, + int block_size) nogil + int BSHUF_VERSION_MAJOR + int BSHUF_VERSION_MINOR + int BSHUF_VERSION_POINT + +__version__ = "%d.%d.%d" % (BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR, + BSHUF_VERSION_POINT) + +IF ZSTD_SUPPORT: + __zstd__ = True +ELSE: + __zstd__ = False + +# Prototypes from bitshuffle.c +cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size) +cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size) + + +ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size) + + +def using_NEON(): + """Whether compiled using Arm NEON instructions.""" + if bshuf_using_NEON(): + return True + else: + return False + + +def using_SSE2(): + """Whether compiled using SSE2 instructions.""" + if bshuf_using_SSE2(): + return True + else: + return False + + +def using_AVX2(): + """Whether compiled using AVX2 instructions.""" + if bshuf_using_AVX2(): + return True + else: + return False + + +def _setup_arr(arr): + shape = tuple(arr.shape) + if not arr.flags['C_CONTIGUOUS']: + msg = "Input array must be C-contiguous." + raise ValueError(msg) + size = arr.size + dtype = arr.dtype + itemsize = dtype.itemsize + out = np.empty(shape, dtype=dtype) + return out, size, itemsize + + +@cython.boundscheck(False) +@cython.wraparound(False) +cdef _wrap_C_fun(Cfptr fun, np.ndarray arr): + """Wrap a C function with standard call signature.""" + + cdef int ii, size, itemsize, count=0 + cdef np.ndarray out + out, size, itemsize = _setup_arr(arr) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + + for ii in range(REPEATC): + count = fun(arr_ptr, out_ptr, size, itemsize) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + return out + + +def copy(np.ndarray arr not None): + """Copies the data. + + For testing and profiling purposes. + + """ + return _wrap_C_fun(&bshuf_copy, arr) + + +def trans_byte_elem_scal(np.ndarray arr not None): + """Transpose bytes within words but not bits. + + """ + return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr) + + +def trans_byte_elem_SSE(np.ndarray arr not None): + """Transpose bytes within array elements. + + """ + return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr) + + +def trans_byte_elem_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr) + + +def trans_bit_byte_scal(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr) + + +def trans_bit_byte_SSE(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr) + + +def trans_bit_byte_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr) + + +def trans_bit_byte_AVX(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr) + + +def trans_bitrow_eight(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr) + + +def trans_bit_elem_AVX(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr) + + +def trans_bit_elem_scal(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr) + + +def trans_bit_elem_SSE(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr) + + +def trans_bit_elem_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr) + + +def trans_byte_bitrow_SSE(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr) + + +def trans_byte_bitrow_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr) + + +def trans_byte_bitrow_AVX(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr) + + +def trans_byte_bitrow_scal(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr) + + +def shuffle_bit_eightelem_scal(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr) + + +def shuffle_bit_eightelem_SSE(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr) + + +def shuffle_bit_eightelem_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr) + + +def shuffle_bit_eightelem_AVX(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr) + + +def untrans_bit_elem_SSE(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr) + + +def untrans_bit_elem_NEON(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr) + + +def untrans_bit_elem_AVX(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr) + + +def untrans_bit_elem_scal(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr) + + +def trans_bit_elem(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_trans_bit_elem, arr) + + +def untrans_bit_elem(np.ndarray arr not None): + return _wrap_C_fun(&bshuf_untrans_bit_elem, arr) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def bitshuffle(np.ndarray arr not None, int block_size=0): + """Bitshuffle an array. + + Output array is the same shape and data type as input array but underlying + buffer has been bitshuffled. + + Parameters + ---------- + arr : numpy array + Data to ne processed. + block_size : positive integer + Block size in number of elements. By default, block size is chosen + automatically. + + Returns + ------- + out : numpy array + Array with the same shape as input but underlying data has been + bitshuffled. + + """ + + cdef int ii, size, itemsize, count=0 + cdef np.ndarray out + out, size, itemsize = _setup_arr(arr) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + + with nogil: + for ii in range(REPEATC): + count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def bitunshuffle(np.ndarray arr not None, int block_size=0): + """Bitshuffle an array. + + Output array is the same shape and data type as input array but underlying + buffer has been un-bitshuffled. + + Parameters + ---------- + arr : numpy array + Data to ne processed. + block_size : positive integer + Block size in number of elements. Must match value used for shuffling. + + Returns + ------- + out : numpy array + Array with the same shape as input but underlying data has been + un-bitshuffled. + + """ + + cdef int ii, size, itemsize, count=0 + cdef np.ndarray out + out, size, itemsize = _setup_arr(arr) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + + with nogil: + for ii in range(REPEATC): + count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + return out + + +@cython.boundscheck(False) +@cython.wraparound(False) +def compress_lz4(np.ndarray arr not None, int block_size=0): + """Bitshuffle then compress an array using LZ4. + + Parameters + ---------- + arr : numpy array + Data to ne processed. + block_size : positive integer + Block size in number of elements. By default, block size is chosen + automatically. + + Returns + ------- + out : array with np.uint8 data type + Buffer holding compressed data. + + """ + + cdef int ii, size, itemsize, count=0 + shape = (arr.shape[i] for i in range(arr.ndim)) + if not arr.flags['C_CONTIGUOUS']: + msg = "Input array must be C-contiguous." + raise ValueError(msg) + size = arr.size + dtype = arr.dtype + itemsize = dtype.itemsize + + max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size) + + cdef np.ndarray out + out = np.empty(max_out_size, dtype=np.uint8) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + with nogil: + for ii in range(REPEATC): + count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + return out[:count] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0): + """Decompress a buffer using LZ4 then bitunshuffle it yielding an array. + + Parameters + ---------- + arr : numpy array + Input data to be decompressed. + shape : tuple of integers + Shape of the output (decompressed array). Must match the shape of the + original data array before compression. + dtype : numpy dtype + Datatype of the output array. Must match the data type of the original + data array before compression. + block_size : positive integer + Block size in number of elements. Must match value used for + compression. + + Returns + ------- + out : numpy array with shape *shape* and data type *dtype* + Decompressed data. + + """ + + cdef int ii, size, itemsize, count=0 + if not arr.flags['C_CONTIGUOUS']: + msg = "Input array must be C-contiguous." + raise ValueError(msg) + size = np.prod(shape) + itemsize = dtype.itemsize + + cdef np.ndarray out + out = np.empty(tuple(shape), dtype=dtype) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + with nogil: + for ii in range(REPEATC): + count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize, + block_size) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + if count != arr.size: + msg = "Decompressed different number of bytes than input buffer size." + msg += "Input buffer %d, decompressed %d." % (arr.size, count) + raise RuntimeError(msg, count) + return out + + +IF ZSTD_SUPPORT: + @cython.boundscheck(False) + @cython.wraparound(False) + def compress_zstd(np.ndarray arr not None, int block_size=0, int comp_lvl=1): + """Bitshuffle then compress an array using ZSTD. + + Parameters + ---------- + arr : numpy array + Data to be processed. + block_size : positive integer + Block size in number of elements. By default, block size is chosen + automatically. + comp_lvl : positive integer + Compression level applied by ZSTD + + Returns + ------- + out : array with np.uint8 data type + Buffer holding compressed data. + + """ + + cdef int ii, size, itemsize, count=0 + shape = (arr.shape[i] for i in range(arr.ndim)) + if not arr.flags['C_CONTIGUOUS']: + msg = "Input array must be C-contiguous." + raise ValueError(msg) + size = arr.size + dtype = arr.dtype + itemsize = dtype.itemsize + + max_out_size = bshuf_compress_zstd_bound(size, itemsize, block_size) + + cdef np.ndarray out + out = np.empty(max_out_size, dtype=np.uint8) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + with nogil: + for ii in range(REPEATC): + count = bshuf_compress_zstd(arr_ptr, out_ptr, size, itemsize, block_size, comp_lvl) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + return out[:count] + + @cython.boundscheck(False) + @cython.wraparound(False) + def decompress_zstd(np.ndarray arr not None, shape, dtype, int block_size=0): + """Decompress a buffer using ZSTD then bitunshuffle it yielding an array. + + Parameters + ---------- + arr : numpy array + Input data to be decompressed. + shape : tuple of integers + Shape of the output (decompressed array). Must match the shape of the + original data array before compression. + dtype : numpy dtype + Datatype of the output array. Must match the data type of the original + data array before compression. + block_size : positive integer + Block size in number of elements. Must match value used for + compression. + + Returns + ------- + out : numpy array with shape *shape* and data type *dtype* + Decompressed data. + + """ + + cdef int ii, size, itemsize, count=0 + if not arr.flags['C_CONTIGUOUS']: + msg = "Input array must be C-contiguous." + raise ValueError(msg) + size = np.prod(shape) + itemsize = dtype.itemsize + + cdef np.ndarray out + out = np.empty(tuple(shape), dtype=dtype) + + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat + arr_flat = arr.view(np.uint8).ravel() + cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat + out_flat = out.view(np.uint8).ravel() + cdef void* arr_ptr = &arr_flat[0] + cdef void* out_ptr = &out_flat[0] + with nogil: + for ii in range(REPEATC): + count = bshuf_decompress_zstd(arr_ptr, out_ptr, size, itemsize, + block_size) + if count < 0: + msg = "Failed. Error code %d." + excp = RuntimeError(msg % count, count) + raise excp + if count != arr.size: + msg = "Decompressed different number of bytes than input buffer size." + msg += "Input buffer %d, decompressed %d." % (arr.size, count) + raise RuntimeError(msg, count) + return out diff --git a/bitshuffle/h5.pyx b/bitshuffle/h5.pyx new file mode 100644 index 00000000..c92e24c8 --- /dev/null +++ b/bitshuffle/h5.pyx @@ -0,0 +1,235 @@ +""" +HDF5 support for Bitshuffle. + +To read a dataset that uses the Bitshuffle filter using h5py, simply import +this module (unless you have installed the Bitshuffle dynamically loaded +filter, in which case importing this module is unnecessary). + +To create a new dataset that includes the Bitshuffle filter, use one of the +convenience functions provided. + + +Constants +========= + + H5FILTER : The Bitshuffle HDF5 filter integer identifier. + H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression. + H5_COMPRESS_ZSTD : Filter option flag for ZSTD compression. + +Functions +========= + + create_dataset + create_bitshuffle_lzf_dataset + create_bitshuffle_compressed_dataset + +Examples +======== + + >>> import numpy as np + >>> import h5py + >>> import bitshuffle.h5 + + >>> shape = (123, 456) + >>> chunks = (10, 456) + >>> dtype = np.float64 + + >>> f = h5py.File("tmp_test.h5") + >>> bitshuffle.h5.create_bitshuffle_compressed_dataset( + f, "some_data", shape, dtype, chunks) + >>> f["some_data"][:] = 42 + +""" + +from __future__ import absolute_import, division, print_function, unicode_literals + +import sys +import numpy +import h5py +from h5py import h5d, h5fd, h5s, h5t, h5p, h5z, defs, filters + +cimport cython + + +cdef extern from b"bshuf_h5filter.h": + int bshuf_register_h5filter() + int BSHUF_H5FILTER + int BSHUF_H5_COMPRESS_LZ4 + int BSHUF_H5_COMPRESS_ZSTD + +cdef extern int init_filter(const char* libname) + +cdef int LZF_FILTER = 32000 + +H5FILTER = BSHUF_H5FILTER +H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4 +H5_COMPRESS_ZSTD = BSHUF_H5_COMPRESS_ZSTD + +# Init HDF5 dynamic loading with HDF5 library used by h5py +if not sys.platform.startswith('win'): + if sys.version_info[0] >= 3: + libs = [bytes(h5d.__file__, encoding='utf-8'), + bytes(h5fd.__file__, encoding='utf-8'), + bytes(h5s.__file__, encoding='utf-8'), + bytes(h5t.__file__, encoding='utf-8'), + bytes(h5p.__file__, encoding='utf-8'), + bytes(h5z.__file__, encoding='utf-8'), + bytes(defs.__file__, encoding='utf-8')] + else: + libs = [h5d.__file__, h5fd.__file__, h5s.__file__, h5t.__file__, + h5p.__file__, h5z.__file__, defs.__file__] + + # Ensure all symbols are loaded + success = -1 + for lib in libs: + success = init_filter(lib) + if success == 0: + break + + if success == -1: + raise RuntimeError("Failed to load all HDF5 symbols using these libs: {}".format(libs)) + + +def register_h5_filter(): + ret = bshuf_register_h5filter() + if ret < 0: + raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret) + + +register_h5_filter() + + +def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None, + fillvalue=None, track_times=None, + filter_pipeline=(), filter_flags=None, filter_opts=None): + """Create a dataset with an arbitrary filter pipeline. + + Return a new low-level dataset identifier. + + Much of this code is copied from h5py, but couldn't reuse much code due to + unstable API. + + """ + + if hasattr(filter_pipeline, "__getitem__"): + filter_pipeline = list(filter_pipeline) + else: + filter_pipeline = [filter_pipeline] + filter_flags = [filter_flags] + filter_opts = [filter_opts] + nfilters = len(filter_pipeline) + if filter_flags is None: + filter_flags = [None] * nfilters + if filter_opts is None: + filter_opts = [None] * nfilters + if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters: + msg = "Supplied incompatible number of filters, flags, and options." + raise ValueError(msg) + + shape = tuple(shape) + + tmp_shape = maxshape if maxshape is not None else shape + # Validate chunk shape + chunks_larger = (numpy.array([ not i>=j + for i, j in zip(tmp_shape, chunks) if i is not None])).any() + if isinstance(chunks, tuple) and chunks_larger: + errmsg = ("Chunk shape must not be greater than data shape in any " + "dimension. {} is not compatible with {}".format(chunks, shape)) + raise ValueError(errmsg) + + if isinstance(dtype, h5py.Datatype): + # Named types are used as-is + tid = dtype.id + dtype = tid.dtype # Following code needs this + else: + # Validate dtype + dtype = numpy.dtype(dtype) + tid = h5t.py_create(dtype, logical=1) + + if shape == (): + if any((chunks, filter_pipeline)): + raise TypeError("Scalar datasets don't support chunk/filter options") + if maxshape and maxshape != (): + raise TypeError("Scalar datasets cannot be extended") + return h5p.create(h5p.DATASET_CREATE) + + def rq_tuple(tpl, name): + """Check if chunks/maxshape match dataset rank""" + if tpl in (None, True): + return + try: + tpl = tuple(tpl) + except TypeError: + raise TypeError('"%s" argument must be None or a sequence object' % name) + if len(tpl) != len(shape): + raise ValueError('"%s" must have same rank as dataset shape' % name) + + rq_tuple(chunks, 'chunks') + rq_tuple(maxshape, 'maxshape') + + if (chunks is True) or (chunks is None and filter_pipeline): + chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize) + + if maxshape is True: + maxshape = (None,)*len(shape) + + dcpl = h5p.create(h5p.DATASET_CREATE) + if chunks is not None: + dcpl.set_chunk(chunks) + dcpl.set_fill_time(h5d.FILL_TIME_ALLOC) # prevent resize glitch + + if fillvalue is not None: + fillvalue = numpy.array(fillvalue) + dcpl.set_fill_value(fillvalue) + + if track_times in (True, False): + dcpl.set_obj_track_times(track_times) + elif track_times is not None: + raise TypeError("track_times must be either True or False") + + for ii in range(nfilters): + this_filter = filter_pipeline[ii] + this_flags = filter_flags[ii] + this_opts = filter_opts[ii] + if this_flags is None: + this_flags = 0 + if this_opts is None: + this_opts = () + dcpl.set_filter(this_filter, this_flags, this_opts) + + if maxshape is not None: + maxshape = tuple(m if m is not None else h5s.UNLIMITED + for m in maxshape) + sid = h5s.create_simple(shape, maxshape) + + dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl) + + return dset_id + + +def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None, + maxshape=None, fillvalue=None, + track_times=None): + """Create dataset with a filter pipeline including bitshuffle and LZF""" + + filter_pipeline = [H5FILTER, LZF_FILTER] + dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, + filter_pipeline=filter_pipeline, maxshape=maxshape, + fillvalue=fillvalue, track_times=track_times) + return dset_id + + +def create_bitshuffle_compressed_dataset(parent, name, shape, dtype, + chunks=None, maxshape=None, + fillvalue=None, track_times=None): + """Create dataset with bitshuffle+internal LZ4 compression.""" + + filter_pipeline = [H5FILTER, ] + filter_opts = [(0, H5_COMPRESS_LZ4)] + dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks, + filter_pipeline=filter_pipeline, + filter_opts=filter_opts, maxshape=maxshape, + fillvalue=fillvalue, track_times=track_times) + return dset_id + + diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat new file mode 100644 index 00000000..ccbb10f9 --- /dev/null +++ b/conda-recipe/bld.bat @@ -0,0 +1,3 @@ +SET CONDA_HOME=%PREFIX% +"%PYTHON%" setup.py install +if errorlevel 1 exit 1 diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh new file mode 100644 index 00000000..34c3a689 --- /dev/null +++ b/conda-recipe/build.sh @@ -0,0 +1,2 @@ +export CONDA_HOME=$PREFIX +$PYTHON setup.py install # Python command to install the script diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml new file mode 100644 index 00000000..ac227e2b --- /dev/null +++ b/conda-recipe/meta.yaml @@ -0,0 +1,27 @@ +package: + name: bitshuffle + version: 0.2.1 +source: + # git_url: https://github.com/kiyo-masui/bitshuffle.git + # git_rev: 0.2.1 + path: .. + patches: + - setup.py.patch + +requirements: + build: + - python + - setuptools + - cython + - numpy + - h5py + - hdf5 + run: + - python + - numpy + - h5py + - cython + +about: + home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py + summary: "bitshuffle library." diff --git a/conda-recipe/setup.py.patch b/conda-recipe/setup.py.patch new file mode 100644 index 00000000..437a5ffa --- /dev/null +++ b/conda-recipe/setup.py.patch @@ -0,0 +1,13 @@ +--- setup.py 2016-01-19 16:56:12.954563000 +0100 ++++ xxx.py 2016-01-19 16:56:00.817087000 +0100 +@@ -40,8 +40,8 @@ + + # Copied from h5py. + # TODO, figure out what the canonacal way to do this should be. +-INCLUDE_DIRS = [] +-LIBRARY_DIRS = [] ++INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include'] ++LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib'] + if sys.platform == 'darwin': + # putting here both macports and homebrew paths will generate + # "ld: warning: dir not found" at the linking phase diff --git a/lz4/LICENSE b/lz4/LICENSE new file mode 100644 index 00000000..74c2cdd7 --- /dev/null +++ b/lz4/LICENSE @@ -0,0 +1,24 @@ +LZ4 Library +Copyright (c) 2011-2016, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/lz4/README.md b/lz4/README.md new file mode 100644 index 00000000..f6ebf5e1 --- /dev/null +++ b/lz4/README.md @@ -0,0 +1,21 @@ +LZ4 - Library Files +================================ + +The __lib__ directory contains several files, but you don't necessarily need them all. + +To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**". + +For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly. + +If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm. +(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.) + +A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***. + +The other files are not source code. There are : + + - LICENSE : contains the BSD license text + - Makefile : script to compile or install lz4 library (static or dynamic) + - liblz4.pc.in : for pkg-config (make install) + +[official interoperable frame format]: ../lz4_Frame_format.md diff --git a/lz4/lz4.c b/lz4/lz4.c new file mode 100644 index 00000000..9f5e9bfa --- /dev/null +++ b/lz4/lz4.c @@ -0,0 +1,2495 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +#define LZ4_STATIC_LINKING_ONLY +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ +#ifdef LZ4_USER_MEMORY_FUNCTIONS +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#include /* memset, memcpy */ +#define MEM_INIT(p,v,s) memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#else +#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 8 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); + LZ4_memcpy(&v[4], v, 4); + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT) + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Like usingExtDict, but everything concerning the preceding + * content is in a separate context, pointed to by + * ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table + * entries in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; } + + +/*-************************************ +* Internal Definitions used in Tests +**************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); + +#if defined (__cplusplus) +} +#endif + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType, + const BYTE* srcBase) +{ + switch (tableType) + { + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } + if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } + { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType, + const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster + * than compressing without a gap. However, compressing with + * currentOffset == 0 is faster still, so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time. + * Presumed already validated at this stage: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*) source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*) source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + /* If init conditions are not met, we don't have to mark stream + * as having dirty context, since no action was taken yet */ + if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ + if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ + assert(acceleration >= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSizehashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + int len = (int)(litLength - RUN_MASK); + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) +{ + return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); + } } +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} + + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + const BYTE* base; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + base = dictEnd - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + + while (p <= dictEnd-HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, tableType, base); + p+=3; + } + + return (int)dict->dictSize; +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) { + const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + workingStream, dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse; + const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); + + LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ + && (dictEnd != (const BYTE*)source) ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = (const BYTE*)source; + } + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) source + inputSize; + if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == (const BYTE*)source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +/* Read the variable-length literal or match length. + * + * ip - pointer to use as input. + * lencheck - end ip. Return an error if ip advances >= lencheck. + * loop_check - check ip >= lencheck in body of loop. Returns loop_error if so. + * initial_check - check ip >= lencheck before start of loop. Returns initial_error if so. + * error (output) - error code. Should be set to 0 before call. + */ +typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; +LZ4_FORCE_INLINE unsigned +read_variable_length(const BYTE**ip, const BYTE* lencheck, + int loop_check, int initial_check, + variable_length_error* error) +{ + U32 length = 0; + U32 s; + if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = initial_error; + return length; + } + do { + s = **ip; + (*ip)++; + length += s; + if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = loop_error; + return length; + } + } while (s==255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + endCondition_directive endOnInput, /* endOnOutputSize, endOnInputSize */ + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if (src == NULL) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if ((endOnInput) && (unlikely(outputSize==0))) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } + if ((endOnInput) && unlikely(srcSize==0)) { return -1; } + + /* Currently the fast loop shows a regression on qualcomm arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "skip fast decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */ + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + if (endOnInput) { assert(ip < iend); } + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if (endOnInput) { /* LZ4_decompress_safe() */ + if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, cpy); + } else { /* LZ4_decompress_fast() */ + if (cpy>oend-8) { goto safe_literal_copy; } + LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and only relies on end-of-block properties */ + } + ip += length; op = cpy; + } else { + cpy = op+length; + if (endOnInput) { /* LZ4_decompress_safe() */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* We don't need to check oend, since we check it once for each loop below */ + if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } + /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ + LZ4_memcpy(op, ip, 16); + } else { /* LZ4_decompress_fast() */ + /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : + * it doesn't know input length, and relies on end-of-block properties */ + LZ4_memcpy(op, ip, 8); + if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } + } + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + assert(match <= op); + + /* get matchlength */ + length = token & ML_MASK; + + if (length == ML_MASK) { + variable_length_error error = ok; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + + /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + goto _output_error; /* end-of-block condition violated */ + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + while (1) { + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + assert(!endOnInput || ip <= iend); /* ip < iend before the increment */ + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (endOnInput ? length != RUN_MASK : length <= 8) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); + if (error == initial_error) { goto _output_error; } + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + + /* copy literals */ + cpy = op+length; +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) + { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + assert(endOnInput); + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence because of the parsing limitations so check + * that we exactly regenerate the original size (must be exact when !endOnInput). + */ + if ((!endOnInput) && (cpy != oend)) { goto _output_error; } + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } + } + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + + _copy_match: + if (length == ML_MASK) { + variable_length_error error = ok; + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) goto _output_error; + if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + if (endOnInput) { + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + } else { + return (int) (((const char*)ip)-src); /* Nb of input bytes read */ + } + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + /* LZ4_decompress_fast doesn't validate match offsets, + * and thus serves well with any prefixed dictionary. */ + return LZ4_decompress_fast(source, dest, originalSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); + LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal)); /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */ + return lz4s; +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t) dictSize; + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, originalSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/lz4/lz4.h b/lz4/lz4.h new file mode 100644 index 00000000..7ab1e483 --- /dev/null +++ b/lz4/lz4.h @@ -0,0 +1,774 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version */ + + +/*-************************************ +* Tuning parameter +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio. + * Reduced memory usage may improve speed, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE 14 +#endif + + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * compressedSize : is the exact complete size of the compressed block. + * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); + + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 accept any input as dictionary, + * results are generally better when using Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of consecutive blocks in "streaming" mode. + * A block is an unsplittable entity, it must be presented entirely to a decompression function. + * Decompression functions only accepts one block at a time. + * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity); + + +/*! LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize); + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef LZ4_STATIC_3504398509 +#define LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +#define LZ4LIB_STATIC_API LZ4LIB_API +#else +#define LZ4LIB_STATIC_API +#endif + + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_attach_dictionary() : + * This is an experimental API that allows + * efficient use of a static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDict() should + * be expected to work. + * + * Alternatively, the provided dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the first compression call on the stream. + */ +LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream); + + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly contrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef LZ4_H_98237428734687 +#define LZ4_H_98237428734687 + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 dictSize; +}; + +typedef struct { + const LZ4_byte* externalDict; + size_t extDictSize; + const LZ4_byte* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + + +/*! LZ4_stream_t : + * Do not use below internal definitions directly ! + * Declare or allocate an LZ4_stream_t instead. + * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. + * The structure definition can be convenient for static allocation + * (on stack, or as part of larger structure). + * Init this structure with LZ4_initStream() before first use. + * note : only use this definition in association with static linking ! + * this definition is not API/ABI safe, and may change in future versions. + */ +#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ +#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) +union LZ4_stream_u { + void* table[LZ4_STREAMSIZE_VOIDP]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead + */ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); + + +/*! LZ4_streamDecode_t : + * information structure to track an LZ4 stream during decompression. + * init this structure using LZ4_setStreamDecode() before first use. + * note : only use in association with static linking ! + * this definition is not API/ABI safe, + * and may change in a future version ! + */ +#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ ) +#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) +union LZ4_streamDecode_u { + unsigned long long table[LZ4_STREAMDECODESIZE_U64]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + + +#endif /* LZ4_H_98237428734687 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/lzf/LICENSE.txt b/lzf/LICENSE.txt new file mode 100644 index 00000000..3787a007 --- /dev/null +++ b/lzf/LICENSE.txt @@ -0,0 +1,34 @@ +Copyright Notice and Statement for LZF filter + +Copyright (c) 2008-2009 Andrew Collette +http://h5py.alfven.org +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of the author nor the names of contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/lzf/README.txt b/lzf/README.txt new file mode 100644 index 00000000..c6ad62c3 --- /dev/null +++ b/lzf/README.txt @@ -0,0 +1,84 @@ +=============================== +LZF filter for HDF5, revision 3 +=============================== + +The LZF filter provides high-speed compression with acceptable compression +performance, resulting in much faster performance than DEFLATE, at the +cost of a slightly lower compression ratio. It's appropriate for large +datasets of low to moderate complexity, for which some compression is +much better than none, but for which the speed of DEFLATE is unacceptable. + +This filter has been tested against HDF5 versions 1.6.5 through 1.8.3. It +is released under the BSD license (see LICENSE.txt for details). + + +Using the filter from HDF5 +-------------------------- + +There is exactly one new public function declared in lzf_filter.h, with +the following signature: + + int register_lzf(void) + +Calling this will register the filter with the HDF5 library. A non-negative +return value indicates success. If the registration fails, an error is pushed +onto the current error stack and a negative value is returned. + +It's strongly recommended to use the SHUFFLE filter with LZF, as it's +cheap, supported by all current versions of HDF5, and can significantly +improve the compression ratio. An example C program ("example.c") is included +which demonstrates the proper use of the filter. + + +Compiling +--------- + +The filter consists of a single .c file and header, along with an embedded +version of the LZF compression library. Since the filter is stateless, it's +recommended to statically link the entire thing into your program; for +example: + + $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog + +It can also be built as a shared library, although you will have to install +the resulting library somewhere the runtime linker can find it: + + $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so + +A similar procedure should be used for building C++ code. As in these +examples, using option -O1 or higher is strongly recommended for increased +performance. + + +Contact +------- + +This filter is maintained as part of the HDF5 for Python (h5py) project. The +goal of h5py is to provide access to the majority of the HDF5 C API and feature +set from Python. The most recent version of h5py (1.1) includes the LZF +filter by default. + +* Downloads and bug tracker: http://h5py.googlecode.com + +* Main web site and documentation: http://h5py.alfven.org + +* Contact email: h5py at alfven dot org + + +History of changes +------------------ + +Revision 3 (6/25/09) + Fix issue with changed filter struct definition under HDF5 1.8.3. + +Revision 2 + Minor speed enhancement. + +Revision 1 + Initial release. + + + + + + diff --git a/lzf/README_bitshuffle.txt b/lzf/README_bitshuffle.txt new file mode 100644 index 00000000..d620a925 --- /dev/null +++ b/lzf/README_bitshuffle.txt @@ -0,0 +1,3 @@ +The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org). +The version included with bitshuffle is from version 2.3 of h5py with no +modifications other than the addition of this README. diff --git a/lzf/example.c b/lzf/example.c new file mode 100644 index 00000000..23dd776c --- /dev/null +++ b/lzf/example.c @@ -0,0 +1,106 @@ +/* + Copyright (C) 2009 Andrew Collette + http://h5py.alfven.org + License: BSD (see LICENSE.txt) + + Example program demonstrating use of the LZF filter from C code. + + To compile this program: + + h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example + + To run: + + $ ./example + Success! + $ h5ls -v test_lzf.hdf5 + Opened "test_lzf.hdf5" with sec2 driver. + dset Dataset {100/100, 100/100, 100/100} + Location: 0:1:0:976 + Links: 1 + Modified: 2009-02-15 16:35:11 PST + Chunks: {1, 100, 100} 40000 bytes + Storage: 4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization + Filter-0: shuffle-2 OPT {4} + Filter-1: lzf-32000 OPT {1, 261, 40000} + Type: native float +*/ + +#include +#include "hdf5.h" +#include "lzf_filter.h" + +#define SIZE 100*100*100 +#define SHAPE {100,100,100} +#define CHUNKSHAPE {1,100,100} + +int main(){ + + static float data[SIZE]; + static float data_out[SIZE]; + const hsize_t shape[] = SHAPE; + const hsize_t chunkshape[] = CHUNKSHAPE; + int r, i; + int return_code = 1; + + hid_t fid, sid, dset, plist = 0; + + for(i=0; i0) H5Dclose(dset); + if(sid>0) H5Sclose(sid); + if(plist>0) H5Pclose(plist); + if(fid>0) H5Fclose(fid); + + return return_code; +} + diff --git a/lzf/lzf/lzf.h b/lzf/lzf/lzf.h new file mode 100644 index 00000000..919b6e6b --- /dev/null +++ b/lzf/lzf/lzf.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000-2008 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef LZF_H +#define LZF_H + +/*********************************************************************** +** +** lzf -- an extremely fast/free compression/decompression-method +** http://liblzf.plan9.de/ +** +** This algorithm is believed to be patent-free. +** +***********************************************************************/ + +#define LZF_VERSION 0x0105 /* 1.5, API version */ + +/* + * Compress in_len bytes stored at the memory block starting at + * in_data and write the result to out_data, up to a maximum length + * of out_len bytes. + * + * If the output buffer is not large enough or any error occurs return 0, + * otherwise return the number of bytes used, which might be considerably + * more than in_len (but less than 104% of the original size), so it + * makes sense to always use out_len == in_len - 1), to ensure _some_ + * compression, and store the data uncompressed otherwise (with a flag, of + * course. + * + * lzf_compress might use different algorithms on different systems and + * even different runs, thus might result in different compressed strings + * depending on the phase of the moon or similar factors. However, all + * these strings are architecture-independent and will result in the + * original data when decompressed using lzf_decompress. + * + * The buffers must not be overlapping. + * + * If the option LZF_STATE_ARG is enabled, an extra argument must be + * supplied which is not reflected in this header file. Refer to lzfP.h + * and lzf_c.c. + * + */ +unsigned int +lzf_compress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len); + +/* + * Decompress data compressed with some version of the lzf_compress + * function and stored at location in_data and length in_len. The result + * will be stored at out_data up to a maximum of out_len characters. + * + * If the output buffer is not large enough to hold the decompressed + * data, a 0 is returned and errno is set to E2BIG. Otherwise the number + * of decompressed bytes (i.e. the original length of the data) is + * returned. + * + * If an error in the compressed data is detected, a zero is returned and + * errno is set to EINVAL. + * + * This function is very fast, about as fast as a copying loop. + */ +unsigned int +lzf_decompress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len); + +#endif + diff --git a/lzf/lzf/lzfP.h b/lzf/lzf/lzfP.h new file mode 100644 index 00000000..8414da4d --- /dev/null +++ b/lzf/lzf/lzfP.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2000-2007 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#ifndef LZFP_h +#define LZFP_h + +#define STANDALONE 1 /* at the moment, this is ok. */ + +#ifndef STANDALONE +# include "lzf.h" +#endif + +/* + * Size of hashtable is (1 << HLOG) * sizeof (char *) + * decompression is independent of the hash table size + * the difference between 15 and 14 is very small + * for small blocks (and 14 is usually a bit faster). + * For a low-memory/faster configuration, use HLOG == 13; + * For best compression, use 15 or 16 (or more, up to 23). + */ +#ifndef HLOG +# define HLOG 17 /* Avoid pathological case at HLOG=16 A.C. 2/15/09 */ +#endif + +/* + * Sacrifice very little compression quality in favour of compression speed. + * This gives almost the same compression as the default code, and is + * (very roughly) 15% faster. This is the preferred mode of operation. + */ +#ifndef VERY_FAST +# define VERY_FAST 1 +#endif + +/* + * Sacrifice some more compression quality in favour of compression speed. + * (roughly 1-2% worse compression for large blocks and + * 9-10% for small, redundant, blocks and >>20% better speed in both cases) + * In short: when in need for speed, enable this for binary data, + * possibly disable this for text data. + */ +#ifndef ULTRA_FAST +# define ULTRA_FAST 1 +#endif + +/* + * Unconditionally aligning does not cost very much, so do it if unsure + */ +#ifndef STRICT_ALIGN +# define STRICT_ALIGN !(defined(__i386) || defined (__amd64)) +#endif + +/* + * You may choose to pre-set the hash table (might be faster on some + * modern cpus and large (>>64k) blocks, and also makes compression + * deterministic/repeatable when the configuration otherwise is the same). + */ +#ifndef INIT_HTAB +# define INIT_HTAB 0 +#endif + +/* ======================================================================= + Changing things below this line may break the HDF5 LZF filter. + A.C. 2/15/09 + ======================================================================= +*/ + +/* + * Avoid assigning values to errno variable? for some embedding purposes + * (linux kernel for example), this is neccessary. NOTE: this breaks + * the documentation in lzf.h. + */ +#ifndef AVOID_ERRNO +# define AVOID_ERRNO 0 +#endif + +/* + * Wether to pass the LZF_STATE variable as argument, or allocate it + * on the stack. For small-stack environments, define this to 1. + * NOTE: this breaks the prototype in lzf.h. + */ +#ifndef LZF_STATE_ARG +# define LZF_STATE_ARG 0 +#endif + +/* + * Wether to add extra checks for input validity in lzf_decompress + * and return EINVAL if the input stream has been corrupted. This + * only shields against overflowing the input buffer and will not + * detect most corrupted streams. + * This check is not normally noticable on modern hardware + * (<1% slowdown), but might slow down older cpus considerably. + */ + +#ifndef CHECK_INPUT +# define CHECK_INPUT 1 +#endif + +/*****************************************************************************/ +/* nothing should be changed below */ + +typedef unsigned char u8; + +typedef const u8 *LZF_STATE[1 << (HLOG)]; + +#if !STRICT_ALIGN +/* for unaligned accesses we need a 16 bit datatype. */ +# include +# if USHRT_MAX == 65535 + typedef unsigned short u16; +# elif UINT_MAX == 65535 + typedef unsigned int u16; +# else +# undef STRICT_ALIGN +# define STRICT_ALIGN 1 +# endif +#endif + +#if ULTRA_FAST +# if defined(VERY_FAST) +# undef VERY_FAST +# endif +#endif + +#if INIT_HTAB +# ifdef __cplusplus +# include +# else +# include +# endif +#endif + +#endif + diff --git a/lzf/lzf/lzf_c.c b/lzf/lzf/lzf_c.c new file mode 100644 index 00000000..fbfd4cce --- /dev/null +++ b/lzf/lzf/lzf_c.c @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2000-2008 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include "lzfP.h" + +#define HSIZE (1 << (HLOG)) + +/* + * don't play with this unless you benchmark! + * decompression is not dependent on the hash function + * the hashing function might seem strange, just believe me + * it works ;) + */ +#ifndef FRST +# define FRST(p) (((p[0]) << 8) | p[1]) +# define NEXT(v,p) (((v) << 8) | p[2]) +# if ULTRA_FAST +# define IDX(h) ((( h >> (3*8 - HLOG)) - h ) & (HSIZE - 1)) +# elif VERY_FAST +# define IDX(h) ((( h >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) +# else +# define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) +# endif +#endif +/* + * IDX works because it is very similar to a multiplicative hash, e.g. + * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1)) + * the latter is also quite fast on newer CPUs, and compresses similarly. + * + * the next one is also quite good, albeit slow ;) + * (int)(cos(h & 0xffffff) * 1e6) + */ + +#if 0 +/* original lzv-like hash function, much worse and thus slower */ +# define FRST(p) (p[0] << 5) ^ p[1] +# define NEXT(v,p) ((v) << 5) ^ p[2] +# define IDX(h) ((h) & (HSIZE - 1)) +#endif + +#define MAX_LIT (1 << 5) +#define MAX_OFF (1 << 13) +#define MAX_REF ((1 << 8) + (1 << 3)) + +#if __GNUC__ >= 3 +# define expect(expr,value) __builtin_expect ((expr),(value)) +# define inline inline +#else +# define expect(expr,value) (expr) +# define inline static +#endif + +#define expect_false(expr) expect ((expr) != 0, 0) +#define expect_true(expr) expect ((expr) != 0, 1) + +/* + * compressed format + * + * 000LLLLL ; literal + * LLLooooo oooooooo ; backref L + * 111ooooo LLLLLLLL oooooooo ; backref L+7 + * + */ + +unsigned int +lzf_compress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len +#if LZF_STATE_ARG + , LZF_STATE htab +#endif + ) +{ +#if !LZF_STATE_ARG + LZF_STATE htab; +#endif + const u8 **hslot; + const u8 *ip = (const u8 *)in_data; + u8 *op = (u8 *)out_data; + const u8 *in_end = ip + in_len; + u8 *out_end = op + out_len; + const u8 *ref; + + /* off requires a type wide enough to hold a general pointer difference. + * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only + * works for differences within a single object). We also assume that no + * no bit pattern traps. Since the only platform that is both non-POSIX + * and fails to support both assumptions is windows 64 bit, we make a + * special workaround for it. + */ +#if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64) + unsigned _int64 off; /* workaround for missing POSIX compliance */ +#else + unsigned long off; +#endif + unsigned int hval; + int lit; + + if (!in_len || !out_len) + return 0; + +#if INIT_HTAB + memset (htab, 0, sizeof (htab)); +# if 0 + for (hslot = htab; hslot < htab + HSIZE; hslot++) + *hslot++ = ip; +# endif +#endif + + lit = 0; op++; /* start run */ + + hval = FRST (ip); + while (ip < in_end - 2) + { + hval = NEXT (hval, ip); + hslot = htab + IDX (hval); + ref = *hslot; *hslot = ip; + + if (1 +#if INIT_HTAB + && ref < ip /* the next test will actually take care of this, but this is faster */ +#endif + && (off = ip - ref - 1) < MAX_OFF + && ip + 4 < in_end + && ref > (u8 *)in_data +#if STRICT_ALIGN + && ref[0] == ip[0] + && ref[1] == ip[1] + && ref[2] == ip[2] +#else + && *(u16 *)ref == *(u16 *)ip + && ref[2] == ip[2] +#endif + ) + { + /* match found at *ref++ */ + unsigned int len = 2; + unsigned int maxlen = in_end - ip - len; + maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; + + if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ + if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */ + return 0; + + op [- lit - 1] = lit - 1; /* stop run */ + op -= !lit; /* undo run if length is zero */ + + for (;;) + { + if (expect_true (maxlen > 16)) + { + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + len++; if (ref [len] != ip [len]) break; + } + + do + len++; + while (len < maxlen && ref[len] == ip[len]); + + break; + } + + len -= 2; /* len is now #octets - 1 */ + ip++; + + if (len < 7) + { + *op++ = (off >> 8) + (len << 5); + } + else + { + *op++ = (off >> 8) + ( 7 << 5); + *op++ = len - 7; + } + + *op++ = off; + lit = 0; op++; /* start run */ + + ip += len + 1; + + if (expect_false (ip >= in_end - 2)) + break; + +#if ULTRA_FAST || VERY_FAST + --ip; +# if VERY_FAST && !ULTRA_FAST + --ip; +# endif + hval = FRST (ip); + + hval = NEXT (hval, ip); + htab[IDX (hval)] = ip; + ip++; + +# if VERY_FAST && !ULTRA_FAST + hval = NEXT (hval, ip); + htab[IDX (hval)] = ip; + ip++; +# endif +#else + ip -= len + 1; + + do + { + hval = NEXT (hval, ip); + htab[IDX (hval)] = ip; + ip++; + } + while (len--); +#endif + } + else + { + /* one more literal byte we must copy */ + if (expect_false (op >= out_end)) + return 0; + + lit++; *op++ = *ip++; + + if (expect_false (lit == MAX_LIT)) + { + op [- lit - 1] = lit - 1; /* stop run */ + lit = 0; op++; /* start run */ + } + } + } + + if (op + 3 > out_end) /* at most 3 bytes can be missing here */ + return 0; + + while (ip < in_end) + { + lit++; *op++ = *ip++; + + if (expect_false (lit == MAX_LIT)) + { + op [- lit - 1] = lit - 1; /* stop run */ + lit = 0; op++; /* start run */ + } + } + + op [- lit - 1] = lit - 1; /* end run */ + op -= !lit; /* undo run if length is zero */ + + return op - (u8 *)out_data; +} + diff --git a/lzf/lzf/lzf_d.c b/lzf/lzf/lzf_d.c new file mode 100644 index 00000000..2e2eedaa --- /dev/null +++ b/lzf/lzf/lzf_d.c @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2000-2007 Marc Alexander Lehmann + * + * Redistribution and use in source and binary forms, with or without modifica- + * tion, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- + * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO + * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- + * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- + * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Alternatively, the contents of this file may be used under the terms of + * the GNU General Public License ("GPL") version 2 or any later version, + * in which case the provisions of the GPL are applicable instead of + * the above. If you wish to allow the use of your version of this file + * only under the terms of the GPL and not to allow others to use your + * version of this file under the BSD license, indicate your decision + * by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL. If you do not delete the + * provisions above, a recipient may use your version of this file under + * either the BSD or the GPL. + */ + +#include "lzfP.h" + +#if AVOID_ERRNO +# define SET_ERRNO(n) +#else +# include +# define SET_ERRNO(n) errno = (n) +#endif + +/* ASM is slower than C in HDF5 tests -- A.C. 2/5/09 +#ifndef __STRICT_ANSI__ +#ifndef H5PY_DISABLE_LZF_ASM +#if (__i386 || __amd64) && __GNUC__ >= 3 +# define lzf_movsb(dst, src, len) \ + asm ("rep movsb" \ + : "=D" (dst), "=S" (src), "=c" (len) \ + : "0" (dst), "1" (src), "2" (len)); +#endif +#endif +#endif +*/ + +unsigned int +lzf_decompress (const void *const in_data, unsigned int in_len, + void *out_data, unsigned int out_len) +{ + u8 const *ip = (const u8 *)in_data; + u8 *op = (u8 *)out_data; + u8 const *const in_end = ip + in_len; + u8 *const out_end = op + out_len; + + do + { + unsigned int ctrl = *ip++; + + if (ctrl < (1 << 5)) /* literal run */ + { + ctrl++; + + if (op + ctrl > out_end) + { + SET_ERRNO (E2BIG); + return 0; + } + +#if CHECK_INPUT + if (ip + ctrl > in_end) + { + SET_ERRNO (EINVAL); + return 0; + } +#endif + +#ifdef lzf_movsb + lzf_movsb (op, ip, ctrl); +#else + do + *op++ = *ip++; + while (--ctrl); +#endif + } + else /* back reference */ + { + unsigned int len = ctrl >> 5; + + u8 *ref = op - ((ctrl & 0x1f) << 8) - 1; + +#if CHECK_INPUT + if (ip >= in_end) + { + SET_ERRNO (EINVAL); + return 0; + } +#endif + if (len == 7) + { + len += *ip++; +#if CHECK_INPUT + if (ip >= in_end) + { + SET_ERRNO (EINVAL); + return 0; + } +#endif + } + + ref -= *ip++; + + if (op + len + 2 > out_end) + { + SET_ERRNO (E2BIG); + return 0; + } + + if (ref < (u8 *)out_data) + { + SET_ERRNO (EINVAL); + return 0; + } + +#ifdef lzf_movsb + len += 2; + lzf_movsb (op, ref, len); +#else + *op++ = *ref++; + *op++ = *ref++; + + do + *op++ = *ref++; + while (--len); +#endif + } + } + while (ip < in_end); + + return op - (u8 *)out_data; +} + diff --git a/lzf/lzf_filter.c b/lzf/lzf_filter.c new file mode 100644 index 00000000..c6dd4b0e --- /dev/null +++ b/lzf/lzf_filter.c @@ -0,0 +1,261 @@ +/***** Preamble block ********************************************************* +* +* This file is part of h5py, a low-level Python interface to the HDF5 library. +* +* Copyright (C) 2008 Andrew Collette +* http://h5py.alfven.org +* License: BSD (See LICENSE.txt for full license) +* +* $Date$ +* +****** End preamble block ****************************************************/ + +/* + Implements an LZF filter module for HDF5, using the BSD-licensed library + by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html). + + No Python-specific code is used. The filter behaves like the DEFLATE + filter, in that it is called for every type and space, and returns 0 + if the data cannot be compressed. + + The only public function is (int) register_lzf(void), which passes on + the result from H5Zregister. +*/ + +#include +#include +#include +#include "hdf5.h" +#include "lzf/lzf.h" +#include "lzf_filter.h" + +/* Our own versions of H5Epush_sim, as it changed in 1.8 */ +#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7 + +#define PUSH_ERR(func, minor, str) H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str) +#define H5PY_GET_FILTER H5Pget_filter_by_id + +#else + +#define PUSH_ERR(func, minor, str) H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) +#define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL) + +#endif + +/* Deal with the mutiple definitions for H5Z_class_t. + Note: Only HDF5 1.6 and 1.8 are supported. + + (1) The old class should always be used for HDF5 1.6 + (2) The new class should always be used for HDF5 1.8 < 1.8.3 + (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the + macro H5_USE_16_API is set +*/ + +#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API) +#define H5PY_H5Z_NEWCLS 1 +#else +#define H5PY_H5Z_NEWCLS 0 +#endif + +size_t lzf_filter(unsigned flags, size_t cd_nelmts, + const unsigned cd_values[], size_t nbytes, + size_t *buf_size, void **buf); + +herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); + + +/* Try to register the filter, passing on the HDF5 return value */ +int register_lzf(void){ + + int retval; + +#if H5PY_H5Z_NEWCLS + H5Z_class_t filter_class = { + H5Z_CLASS_T_VERS, + (H5Z_filter_t)(H5PY_FILTER_LZF), + 1, 1, + "lzf", + NULL, + (H5Z_set_local_func_t)(lzf_set_local), + (H5Z_func_t)(lzf_filter) + }; +#else + H5Z_class_t filter_class = { + (H5Z_filter_t)(H5PY_FILTER_LZF), + "lzf", + NULL, + (H5Z_set_local_func_t)(lzf_set_local), + (H5Z_func_t)(lzf_filter) + }; +#endif + + retval = H5Zregister(&filter_class); + if(retval<0){ + PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter"); + } + return retval; +} + +/* Filter setup. Records the following inside the DCPL: + + 1. If version information is not present, set slots 0 and 1 to the filter + revision and LZF API version, respectively. + + 2. Compute the chunk size in bytes and store it in slot 2. +*/ +herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){ + + int ndims; + int i; + herr_t r; + + unsigned int bufsize; + hsize_t chunkdims[32]; + + unsigned int flags; + size_t nelements = 8; + unsigned values[] = {0,0,0,0,0,0,0,0}; + + r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL); + if(r<0) return -1; + + if(nelements < 3) nelements = 3; /* First 3 slots reserved. If any higher + slots are used, preserve the contents. */ + + /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be + careful not to clobber any existing version info */ + if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION; + if(values[1]==0) values[1] = LZF_VERSION; + + ndims = H5Pget_chunk(dcpl, 32, chunkdims); + if(ndims<0) return -1; + if(ndims>32){ + PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit"); + return -1; + } + + bufsize = H5Tget_size(type); + if(bufsize==0) return -1; + + for(i=0;i=3)&&(cd_values[2]!=0)){ + outbuf_size = cd_values[2]; /* Precomputed buffer guess */ + }else{ + outbuf_size = (*buf_size); + } + +#ifdef H5PY_LZF_DEBUG + fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size); +#endif + + while(!status){ + + free(outbuf); + outbuf = malloc(outbuf_size); + + if(outbuf == NULL){ + PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer"); + goto failed; + } + + status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size); + + if(!status){ /* compression failed */ + + if(errno == E2BIG){ + outbuf_size += (*buf_size); +#ifdef H5PY_LZF_DEBUG + fprintf(stderr, " Too small: %d\n", outbuf_size); +#endif + } else if(errno == EINVAL) { + + PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression"); + goto failed; + + } else { + PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error"); + goto failed; + } + + } /* if !status */ + + } /* while !status */ + + } /* compressing vs decompressing */ + + if(status != 0){ + + free(*buf); + *buf = outbuf; + *buf_size = outbuf_size; + + return status; /* Size of compressed/decompressed data */ + } + + failed: + + free(outbuf); + return 0; + +} /* End filter function */ + + + + + + + + + + + + + diff --git a/lzf/lzf_filter.h b/lzf/lzf_filter.h new file mode 100644 index 00000000..27dff83a --- /dev/null +++ b/lzf/lzf_filter.h @@ -0,0 +1,38 @@ +/***** Preamble block ********************************************************* +* +* This file is part of h5py, a low-level Python interface to the HDF5 library. +* +* Copyright (C) 2008 Andrew Collette +* http://h5py.alfven.org +* License: BSD (See LICENSE.txt for full license) +* +* $Date$ +* +****** End preamble block ****************************************************/ + + +#ifndef H5PY_LZF_H +#define H5PY_LZF_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Filter revision number, starting at 1 */ +#define H5PY_FILTER_LZF_VERSION 4 + +/* Filter ID registered with the HDF Group as of 2/6/09. For maintenance + requests, contact the filter author directly. */ +#define H5PY_FILTER_LZF 32000 + +/* Register the filter with the library. Returns a negative value on failure, + and a non-negative value on success. +*/ +int register_lzf(void); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..8b2a6860 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Include dependencies when building wheels on cibuildwheel +[build-system] +requires = [ + "setuptools>=0.7", + "Cython>=0.19", + "oldest-supported-numpy", + "h5py>=2.4.0", +] + +build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..34c51ec6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +# Order matters +setuptools>=0.7 +Cython>=0.19 +numpy>=1.6.1 +h5py>=2.4.0 diff --git a/setup.cfg.example b/setup.cfg.example new file mode 100644 index 00000000..6bd2ccfb --- /dev/null +++ b/setup.cfg.example @@ -0,0 +1,10 @@ +[install] +# These control the installation of the hdf5 dynamically loaded filter plugin. +h5plugin = 0 +h5plugin-dir = /usr/local/hdf5/lib/plugin + +[build_ext] +# Whether to compile with OpenMP multi-threading. Default is system dependant: +# False on OSX (since the clang compiler does not yet support OpenMP) and True +# otherwise. +omp = 1 diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..ff99b8ef --- /dev/null +++ b/setup.py @@ -0,0 +1,419 @@ +from __future__ import absolute_import, division, print_function + +# I didn't import unicode_literals. They break setuptools or Cython in python +# 2.7, but python 3 seems to be happy with them. + +import glob +import os +from os import path +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext as build_ext_ +from setuptools.command.develop import develop as develop_ +from setuptools.command.install import install as install_ +from Cython.Compiler.Main import default_options +import shutil +import subprocess +import sys +import platform + + +VERSION_MAJOR = 0 +VERSION_MINOR = 4 +VERSION_POINT = 2 +# Define ZSTD macro for cython compilation +default_options["compile_time_env"] = {"ZSTD_SUPPORT": False} + +# Only unset in the 'release' branch and in tags. +VERSION_DEV = None + +VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT) +if VERSION_DEV: + VERSION = VERSION + ".dev%d" % VERSION_DEV + + +COMPILE_FLAGS = ["-O3", "-ffast-math", "-std=c99"] +# Cython breaks strict aliasing rules. +COMPILE_FLAGS += ["-fno-strict-aliasing"] +COMPILE_FLAGS += ["-fPIC"] +COMPILE_FLAGS_MSVC = ["/Ox", "/fp:fast"] + +MACROS = [ + ("BSHUF_VERSION_MAJOR", VERSION_MAJOR), + ("BSHUF_VERSION_MINOR", VERSION_MINOR), + ("BSHUF_VERSION_POINT", VERSION_POINT), +] + + +H5PLUGINS_DEFAULT = "/usr/local/hdf5/lib/plugin" +MARCH_DEFAULT = "native" + +# OSX's clang compliler does not support OpenMP. +if sys.platform == "darwin": + OMP_DEFAULT = False +else: + OMP_DEFAULT = True + +FALLBACK_CONFIG = { + "include_dirs": [], + "library_dirs": [], + "libraries": [], + "extra_compile_args": [], + "extra_link_args": [], +} + +if "HDF5_DIR" in os.environ: + FALLBACK_CONFIG["include_dirs"] += [os.environ["HDF5_DIR"] + "/include"] # macports + FALLBACK_CONFIG["library_dirs"] += [os.environ["HDF5_DIR"] + "/lib"] # macports +elif sys.platform == "darwin": + # putting here both macports and homebrew paths will generate + # "ld: warning: dir not found" at the linking phase + FALLBACK_CONFIG["include_dirs"] += ["/opt/local/include"] # macports + FALLBACK_CONFIG["library_dirs"] += ["/opt/local/lib"] # macports + FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"] # homebrew + FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"] # homebrew +elif sys.platform.startswith("freebsd"): + FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"] # homebrew + FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"] # homebrew + +FALLBACK_CONFIG["include_dirs"] = [ + d for d in FALLBACK_CONFIG["include_dirs"] if path.isdir(d) +] +FALLBACK_CONFIG["library_dirs"] = [ + d for d in FALLBACK_CONFIG["library_dirs"] if path.isdir(d) +] + +FALLBACK_CONFIG["extra_compile_args"] = ["-DH5_BUILT_AS_DYNAMIC_LIB"] + + +def pkgconfig(*packages, **kw): + config = kw.setdefault("config", {}) + optional_args = kw.setdefault("optional", "") + flag_map = { + "include_dirs": ["--cflags-only-I", 2], + "library_dirs": ["--libs-only-L", 2], + "libraries": ["--libs-only-l", 2], + "extra_compile_args": ["--cflags-only-other", 0], + "extra_link_args": ["--libs-only-other", 0], + } + for package in packages: + try: + subprocess.check_output(["pkg-config", package]) + except (subprocess.CalledProcessError, OSError): + print( + "Can't find %s with pkg-config fallback to " "static config" % package + ) + for distutils_key in flag_map: + config.setdefault(distutils_key, []).extend( + FALLBACK_CONFIG[distutils_key] + ) + config["libraries"].append(package) + else: + for distutils_key, (pkg_option, n) in flag_map.items(): + items = ( + subprocess.check_output( + ["pkg-config", optional_args, pkg_option, package] + ) + .decode("utf8") + .split() + ) + opt = config.setdefault(distutils_key, []) + opt.extend([i[n:] for i in items]) + return config + + +zstd_headers = ["zstd/lib/zstd.h"] +zstd_lib = ["zstd/lib/"] +zstd_sources = glob.glob("zstd/lib/common/*.c") +zstd_sources += glob.glob("zstd/lib/compress/*.c") +zstd_sources += glob.glob("zstd/lib/decompress/*.c") + +ext_bshuf = Extension( + "bitshuffle.ext", + sources=[ + "bitshuffle/ext.pyx", + "src/bitshuffle.c", + "src/bitshuffle_core.c", + "src/iochain.c", + "lz4/lz4.c", + ], + include_dirs=["src/", "lz4/"], + depends=["src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "lz4/lz4.h"], + libraries=[], + define_macros=MACROS, +) + +h5filter = Extension( + "bitshuffle.h5", + sources=[ + "bitshuffle/h5.pyx", + "src/bshuf_h5filter.c", + "src/bitshuffle.c", + "src/bitshuffle_core.c", + "src/iochain.c", + "lz4/lz4.c", + ], + depends=[ + "src/bitshuffle.h", + "src/bitshuffle_core.h", + "src/iochain.h", + "src/bshuf_h5filter.h", + "lz4/lz4.h", + ], + define_macros=MACROS + [("H5_USE_18_API", None)], + **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"])) +) + +if not sys.platform.startswith("win"): + h5filter.sources.append("src/hdf5_dl.c") + h5filter.libraries.remove("hdf5") + +filter_plugin = Extension( + "bitshuffle.plugin.libh5bshuf", + sources=[ + "src/bshuf_h5plugin.c", + "src/bshuf_h5filter.c", + "src/bitshuffle.c", + "src/bitshuffle_core.c", + "src/iochain.c", + "lz4/lz4.c", + ], + depends=[ + "src/bitshuffle.h", + "src/bitshuffle_core.h", + "src/iochain.h", + "src/bshuf_h5filter.h", + "lz4/lz4.h", + ], + define_macros=MACROS, + **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"])) +) + +lzf_plugin = Extension( + "bitshuffle.plugin.libh5LZF", + sources=[ + "src/lzf_h5plugin.c", + "lzf/lzf_filter.c", + "lzf/lzf/lzf_c.c", + "lzf/lzf/lzf_d.c", + ], + depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h", "lzf/lzf/lzfP.h"], + **pkgconfig("hdf5", config=dict(include_dirs=["lzf/", "lzf/lzf/"])) +) + + +EXTENSIONS = [ext_bshuf, h5filter] + +# For enabling ZSTD support when building wheels +if "ENABLE_ZSTD" in os.environ: + default_options["compile_time_env"] = {"ZSTD_SUPPORT": True} + for ext in EXTENSIONS: + if ext.name in [ + "bitshuffle.ext", + "bitshuffle.h5", + "bitshuffle.plugin.libh5bshuf", + ]: + ext.sources += zstd_sources + ext.include_dirs += zstd_lib + ext.depends += zstd_headers + ext.define_macros += [("ZSTD_SUPPORT", 1)] + +# Check for plugin hdf5 plugin support (hdf5 >= 1.8.11) +HDF5_PLUGIN_SUPPORT = False +CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else [] +for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS: + if os.path.exists(os.path.join(p, "H5PLextern.h")): + HDF5_PLUGIN_SUPPORT = True + +if HDF5_PLUGIN_SUPPORT: + EXTENSIONS.extend([filter_plugin, lzf_plugin]) + + +class develop(develop_): + def run(self): + # Dummy directory for copying build plugins. + if not path.isdir("bitshuffle/plugin"): + os.mkdir("bitshuffle/plugin") + develop_.run(self) + + +# Custom installation to include installing dynamic filters. +class install(install_): + user_options = install_.user_options + [ + ("h5plugin", None, "Install HDF5 filter plugins for use outside of python."), + ( + "h5plugin-dir=", + None, + "Where to install filter plugins. Default %s." % H5PLUGINS_DEFAULT, + ), + ("zstd", None, "Install ZSTD support."), + ] + + def initialize_options(self): + install_.initialize_options(self) + self.h5plugin = False + self.zstd = False + self.h5plugin_dir = H5PLUGINS_DEFAULT + + def finalize_options(self): + install_.finalize_options(self) + if self.h5plugin not in ("0", "1", True, False): + raise ValueError("Invalid h5plugin argument. Must be '0' or '1'.") + self.h5plugin = int(self.h5plugin) + self.h5plugin_dir = path.abspath(self.h5plugin_dir) + self.zstd = self.zstd + + # Add ZSTD files and macro to extensions if ZSTD enabled + if self.zstd: + default_options["compile_time_env"] = {"ZSTD_SUPPORT": True} + for ext in EXTENSIONS: + if ext.name in [ + "bitshuffle.ext", + "bitshuffle.h5", + "bitshuffle.plugin.libh5bshuf", + ]: + ext.sources += zstd_sources + ext.include_dirs += zstd_lib + ext.depends += zstd_headers + ext.define_macros += [("ZSTD_SUPPORT", 1)] + + def run(self): + install_.run(self) + if self.h5plugin: + if not HDF5_PLUGIN_SUPPORT: + print("HDF5 < 1.8.11, not installing filter plugins.") + return + plugin_build = path.join(self.build_lib, "bitshuffle", "plugin") + try: + os.makedirs(self.h5plugin_dir) + except OSError as e: + if e.args[0] == 17: + # Directory already exists, this is fine. + pass + else: + raise + plugin_libs = glob.glob(path.join(plugin_build, "*")) + for plugin_lib in plugin_libs: + plugin_name = path.split(plugin_lib)[1] + shutil.copy2(plugin_lib, path.join(self.h5plugin_dir, plugin_name)) + print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir) + + +# Command line or site.cfg specification of OpenMP. +class build_ext(build_ext_): + user_options = build_ext_.user_options + [ + ( + "omp=", + None, + "Whether to compile with OpenMP threading. Default" + " on current system is %s." % str(OMP_DEFAULT), + ), + ( + "march=", + None, + "Generate instructions for a specific machine type. Default is %s." + % MARCH_DEFAULT, + ), + ] + boolean_options = build_ext_.boolean_options + ["omp"] + + def initialize_options(self): + build_ext_.initialize_options(self) + self.omp = OMP_DEFAULT + self.march = MARCH_DEFAULT + + def finalize_options(self): + # For some reason this gets run twice. Careful to print messages and + # add arguments only one time. + build_ext_.finalize_options(self) + + if self.omp not in ("0", "1", True, False): + raise ValueError("Invalid omp argument. Mut be '0' or '1'.") + self.omp = int(self.omp) + + import numpy as np + + ext_bshuf.include_dirs.append(np.get_include()) + + # Required only by old version of setuptools < 18.0 + from Cython.Build import cythonize + + self.extensions = cythonize(self.extensions) + for ext in self.extensions: + ext._needs_stub = False + + def build_extensions(self): + c = self.compiler.compiler_type + + if self.omp not in ("0", "1", True, False): + raise ValueError("Invalid omp argument. Mut be '0' or '1'.") + self.omp = int(self.omp) + + if self.omp: + if not hasattr(self, "_printed_omp_message"): + self._printed_omp_message = True + print("\n#################################") + print("# Compiling with OpenMP support #") + print("#################################\n") + # More portable to pass -fopenmp to linker. + # self.libraries += ['gomp'] + if self.compiler.compiler_type == "msvc": + openmpflag = "/openmp" + compileflags = COMPILE_FLAGS_MSVC + else: + openmpflag = "-fopenmp" + archi = platform.machine() + if archi in ("i386", "x86_64"): + compileflags = COMPILE_FLAGS + ["-march=%s" % self.march] + else: + compileflags = COMPILE_FLAGS + ["-mcpu=%s" % self.march] + if archi == "ppc64le": + compileflags = COMPILE_FLAGS + ["-DNO_WARN_X86_INTRINSICS"] + for e in self.extensions: + e.extra_compile_args = list( + set(e.extra_compile_args).union(compileflags) + ) + if openmpflag not in e.extra_compile_args: + e.extra_compile_args += [openmpflag] + if openmpflag not in e.extra_link_args: + e.extra_link_args += [openmpflag] + + build_ext_.build_extensions(self) + + +# Don't install numpy/cython/hdf5 if not needed +for cmd in ["sdist", "clean", "--help", "--help-commands", "--version"]: + if cmd in sys.argv: + setup_requires = [] + break +else: + setup_requires = ["Cython>=0.19", "numpy>=1.6.1"] + +with open("requirements.txt") as f: + requires = f.read().splitlines() + requires = [r.split()[0] for r in requires] + +with open("README.rst") as r: + long_description = r.read() + +# TODO hdf5 support should be an "extra". Figure out how to set this up. +setup( + name="bitshuffle", + version=VERSION, + packages=["bitshuffle", "bitshuffle"], + scripts=[], + ext_modules=EXTENSIONS, + cmdclass={"build_ext": build_ext, "install": install, "develop": develop}, + setup_requires=setup_requires, + install_requires=requires, + # extras_require={'H5': ["h5py"]}, + package_data={"": ["data/*"]}, + # metadata for upload to PyPI + author="Kiyoshi Wesley Masui", + author_email="kiyo@physics.ubc.ca", + description="Bitshuffle filter for improving typed data compression.", + long_description=long_description, + license="MIT", + url="https://github.com/kiyo-masui/bitshuffle", + download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s" % VERSION), + keywords=["compression", "hdf5", "numpy"], +) diff --git a/src/bitshuffle.c b/src/bitshuffle.c new file mode 100644 index 00000000..a8ef0b5c --- /dev/null +++ b/src/bitshuffle.c @@ -0,0 +1,279 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + +#include "bitshuffle.h" +#include "bitshuffle_core.h" +#include "bitshuffle_internals.h" +#include "lz4.h" + +#ifdef ZSTD_SUPPORT +#include "zstd.h" +#endif + +#include +#include + + +// Macros. +#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) { \ + free(buf); return count - 1000; } + + +/* Bitshuffle and compress a single block. */ +int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \ + const size_t size, const size_t elem_size, const int option) { + + int64_t nbytes, count; + void *tmp_buf_bshuf; + void *tmp_buf_lz4; + size_t this_iter; + const void *in; + void *out; + + tmp_buf_bshuf = malloc(size * elem_size); + if (tmp_buf_bshuf == NULL) return -1; + + int dst_capacity = LZ4_compressBound(size * elem_size); + tmp_buf_lz4 = malloc(dst_capacity); + if (tmp_buf_lz4 == NULL){ + free(tmp_buf_bshuf); + return -1; + } + + + in = ioc_get_in(C_ptr, &this_iter); + ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); + + count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size); + if (count < 0) { + free(tmp_buf_lz4); + free(tmp_buf_bshuf); + return count; + } + nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity); + free(tmp_buf_bshuf); + CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4); + + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4)); + + bshuf_write_uint32_BE(out, nbytes); + memcpy((char *) out + 4, tmp_buf_lz4, nbytes); + + free(tmp_buf_lz4); + + return nbytes + 4; +} + + +/* Decompress and bitunshuffle a single block. */ +int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr, + const size_t size, const size_t elem_size, const int option) { + + int64_t nbytes, count; + void *out, *tmp_buf; + const void *in; + size_t this_iter; + int32_t nbytes_from_header; + + in = ioc_get_in(C_ptr, &this_iter); + nbytes_from_header = bshuf_read_uint32_BE(in); + ioc_set_next_in(C_ptr, &this_iter, + (void*) ((char*) in + nbytes_from_header + 4)); + + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, + (void *) ((char *) out + size * elem_size)); + + tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header, + size * elem_size); + CHECK_ERR_FREE_LZ(nbytes, tmp_buf); + if (nbytes != size * elem_size) { + free(tmp_buf); + return -91; + } + nbytes = nbytes_from_header; + + count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + nbytes += 4; + + free(tmp_buf); + return nbytes; +} + +#ifdef ZSTD_SUPPORT +/* Bitshuffle and compress a single block. */ +int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \ + const size_t size, const size_t elem_size, const int comp_lvl) { + + int64_t nbytes, count; + void *tmp_buf_bshuf; + void *tmp_buf_zstd; + size_t this_iter; + const void *in; + void *out; + + tmp_buf_bshuf = malloc(size * elem_size); + if (tmp_buf_bshuf == NULL) return -1; + + size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size); + tmp_buf_zstd = malloc(tmp_buf_zstd_size); + if (tmp_buf_zstd == NULL){ + free(tmp_buf_bshuf); + return -1; + } + + in = ioc_get_in(C_ptr, &this_iter); + ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size)); + + count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size); + if (count < 0) { + free(tmp_buf_zstd); + free(tmp_buf_bshuf); + return count; + } + nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf, size * elem_size, comp_lvl); + free(tmp_buf_bshuf); + CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd); + + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4)); + + bshuf_write_uint32_BE(out, nbytes); + memcpy((char *) out + 4, tmp_buf_zstd, nbytes); + + free(tmp_buf_zstd); + + return nbytes + 4; +} + + +/* Decompress and bitunshuffle a single block. */ +int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr, + const size_t size, const size_t elem_size, const int option) { + + int64_t nbytes, count; + void *out, *tmp_buf; + const void *in; + size_t this_iter; + int32_t nbytes_from_header; + + in = ioc_get_in(C_ptr, &this_iter); + nbytes_from_header = bshuf_read_uint32_BE(in); + ioc_set_next_in(C_ptr, &this_iter, + (void*) ((char*) in + nbytes_from_header + 4)); + + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, + (void *) ((char *) out + size * elem_size)); + + tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + nbytes = ZSTD_decompress(tmp_buf, size * elem_size, in + 4, nbytes_from_header); + CHECK_ERR_FREE_LZ(nbytes, tmp_buf); + if (nbytes != size * elem_size) { + free(tmp_buf); + return -91; + } + + nbytes = nbytes_from_header; + count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + nbytes += 4; + + free(tmp_buf); + return nbytes; +} +#endif // ZSTD_SUPPORT + + +/* ---- Public functions ---- + * + * See header file for description and usage. + * + */ + +size_t bshuf_compress_lz4_bound(const size_t size, + const size_t elem_size, size_t block_size) { + + size_t bound, leftover; + + if (block_size == 0) { + block_size = bshuf_default_block_size(elem_size); + } + if (block_size % BSHUF_BLOCKED_MULT) return -81; + + // Note that each block gets a 4 byte header. + // Size of full blocks. + bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size); + // Size of partial blocks, if any. + leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; + if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4; + // Size of uncompressed data not fitting into any blocks. + bound += (size % BSHUF_BLOCKED_MULT) * elem_size; + return bound; +} + + +int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size) { + return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size, + elem_size, block_size, 0/*option*/); +} + + +int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size) { + return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size, + elem_size, block_size, 0/*option*/); +} + +#ifdef ZSTD_SUPPORT +size_t bshuf_compress_zstd_bound(const size_t size, + const size_t elem_size, size_t block_size) { + + size_t bound, leftover; + + if (block_size == 0) { + block_size = bshuf_default_block_size(elem_size); + } + if (block_size % BSHUF_BLOCKED_MULT) return -81; + + // Note that each block gets a 4 byte header. + // Size of full blocks. + bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size); + // Size of partial blocks, if any. + leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; + if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4; + // Size of uncompressed data not fitting into any blocks. + bound += (size % BSHUF_BLOCKED_MULT) * elem_size; + return bound; +} + + +int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size, const int comp_lvl) { + return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size, + elem_size, block_size, comp_lvl); +} + + +int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size) { + return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size, + elem_size, block_size, 0/*option*/); +} +#endif // ZSTD_SUPPORT diff --git a/src/bitshuffle.h b/src/bitshuffle.h new file mode 100644 index 00000000..1a13dd17 --- /dev/null +++ b/src/bitshuffle.h @@ -0,0 +1,205 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + * + * Header File + * + * Worker routines return an int64_t which is the number of bytes processed + * if positive or an error code if negative. + * + * Error codes: + * -1 : Failed to allocate memory. + * -11 : Missing SSE. + * -12 : Missing AVX. + * -80 : Input size not a multiple of 8. + * -81 : block_size not multiple of 8. + * -91 : Decompression error, wrong number of bytes processed. + * -1YYY : Error internal to compression routine with error code -YYY. + */ + + +#ifndef BITSHUFFLE_H +#define BITSHUFFLE_H + +#include +#include "bitshuffle_core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * ---- LZ4 Interface ---- + */ + +/* ---- bshuf_compress_lz4_bound ---- + * + * Bound on size of data compressed with *bshuf_compress_lz4*. + * + * Parameters + * ---------- + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * Bound on compressed data size. + * + */ +size_t bshuf_compress_lz4_bound(const size_t size, + const size_t elem_size, size_t block_size); + + +/* ---- bshuf_compress_lz4 ---- + * + * Bitshuffled and compress the data using LZ4. + * + * Transpose within elements, in blocks of data of *block_size* elements then + * compress the blocks using LZ4. In the output buffer, each block is prefixed + * by a 4 byte integer giving the compressed size of that block. + * + * Output buffer must be large enough to hold the compressed data. This could + * be in principle substantially larger than the input buffer. Use the routine + * *bshuf_compress_lz4_bound* to get an upper limit. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be large enough to hold data. + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * number of bytes used in output buffer, negative error-code if failed. + * + */ +int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t + elem_size, size_t block_size); + + +/* ---- bshuf_decompress_lz4 ---- + * + * Undo compression and bitshuffling. + * + * Decompress data then un-bitshuffle it in blocks of *block_size* elements. + * + * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* + * must patch the parameters used to compress the data. + * + * Parameters + * ---------- + * in : input buffer + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * number of bytes consumed in *input* buffer, negative error-code if failed. + * + */ +int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size); + +/* + * ---- ZSTD Interface ---- + */ + +#ifdef ZSTD_SUPPORT +/* ---- bshuf_compress_zstd_bound ---- + * + * Bound on size of data compressed with *bshuf_compress_zstd*. + * + * Parameters + * ---------- + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * Bound on compressed data size. + * + */ +size_t bshuf_compress_zstd_bound(const size_t size, + const size_t elem_size, size_t block_size); + +/* ---- bshuf_compress_zstd ---- + * + * Bitshuffled and compress the data using zstd. + * + * Transpose within elements, in blocks of data of *block_size* elements then + * compress the blocks using ZSTD. In the output buffer, each block is prefixed + * by a 4 byte integer giving the compressed size of that block. + * + * Output buffer must be large enough to hold the compressed data. This could + * be in principle substantially larger than the input buffer. Use the routine + * *bshuf_compress_zstd_bound* to get an upper limit. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be large enough to hold data. + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * comp_lvl : compression level applied + * + * Returns + * ------- + * number of bytes used in output buffer, negative error-code if failed. + * + */ +int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t + elem_size, size_t block_size, const int comp_lvl); + + +/* ---- bshuf_decompress_zstd ---- + * + * Undo compression and bitshuffling. + * + * Decompress data then un-bitshuffle it in blocks of *block_size* elements. + * + * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* + * must patch the parameters used to compress the data. + * + * Parameters + * ---------- + * in : input buffer + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Process in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * number of bytes consumed in *input* buffer, negative error-code if failed. + * + */ +int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size); + +#endif // ZSTD_SUPPORT + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // BITSHUFFLE_H diff --git a/src/bitshuffle_core.c b/src/bitshuffle_core.c new file mode 100644 index 00000000..ef33bf55 --- /dev/null +++ b/src/bitshuffle_core.c @@ -0,0 +1,1864 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + +#include "bitshuffle_core.h" +#include "bitshuffle_internals.h" + +#include +#include + + +#if defined(__AVX2__) && defined (__SSE2__) +#define USEAVX2 +#endif + +#if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS) +#define USESSE2 +#endif + +#if defined(__ARM_NEON__) || (__ARM_NEON) +#ifdef __aarch64__ +#define USEARMNEON +#endif +#endif + +// Conditional includes for SSE2 and AVX2. +#ifdef USEAVX2 +#include +#elif defined USESSE2 +#include +#elif defined USEARMNEON +#include +#endif + +#if defined(_OPENMP) && defined(_MSC_VER) +typedef int64_t omp_size_t; +#else +typedef size_t omp_size_t; +#endif + +// Macros. +#define CHECK_MULT_EIGHT(n) if (n % 8) return -80; +#define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) + + +/* ---- Functions indicating compile time instruction set. ---- */ + +int bshuf_using_NEON(void) { +#ifdef USEARMNEON + return 1; +#else + return 0; +#endif +} + + +int bshuf_using_SSE2(void) { +#ifdef USESSE2 + return 1; +#else + return 0; +#endif +} + + +int bshuf_using_AVX2(void) { +#ifdef USEAVX2 + return 1; +#else + return 0; +#endif +} + + +/* ---- Worker code not requiring special instruction sets. ---- + * + * The following code does not use any x86 specific vectorized instructions + * and should compile on any machine + * + */ + +/* Transpose 8x8 bit array packed into a single quadword *x*. + * *t* is workspace. */ +#define TRANS_BIT_8X8(x, t) { \ + t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL; \ + x = x ^ t ^ (t << 7); \ + t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL; \ + x = x ^ t ^ (t << 14); \ + t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL; \ + x = x ^ t ^ (t << 28); \ + } + +/* Transpose 8x8 bit array along the diagonal from upper right + to lower left */ +#define TRANS_BIT_8X8_BE(x, t) { \ + t = (x ^ (x >> 9)) & 0x0055005500550055LL; \ + x = x ^ t ^ (t << 9); \ + t = (x ^ (x >> 18)) & 0x0000333300003333LL; \ + x = x ^ t ^ (t << 18); \ + t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL; \ + x = x ^ t ^ (t << 36); \ + } + +/* Transpose of an array of arbitrarily typed elements. */ +#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) { \ + size_t ii, jj, kk; \ + const type_t* in_type = (const type_t*) in; \ + type_t* out_type = (type_t*) out; \ + for(ii = 0; ii + 7 < lda; ii += 8) { \ + for(jj = 0; jj < ldb; jj++) { \ + for(kk = 0; kk < 8; kk++) { \ + out_type[jj*lda + ii + kk] = \ + in_type[ii*ldb + kk * ldb + jj]; \ + } \ + } \ + } \ + for(ii = lda - lda % 8; ii < lda; ii ++) { \ + for(jj = 0; jj < ldb; jj++) { \ + out_type[jj*lda + ii] = in_type[ii*ldb + jj]; \ + } \ + } \ + } + + +/* Memory copy with bshuf call signature. For testing and profiling. */ +int64_t bshuf_copy(const void* in, void* out, const size_t size, + const size_t elem_size) { + + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + memcpy(out_b, in_b, size * elem_size); + return size * elem_size; +} + + +/* Transpose bytes within elements, starting partway through input. */ +int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start) { + + size_t ii, jj, kk; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + CHECK_MULT_EIGHT(start); + + if (size > start) { + // ii loop separated into 2 loops so the compiler can unroll + // the inner one. + for (ii = start; ii + 7 < size; ii += 8) { + for (jj = 0; jj < elem_size; jj++) { + for (kk = 0; kk < 8; kk++) { + out_b[jj * size + ii + kk] + = in_b[ii * elem_size + kk * elem_size + jj]; + } + } + } + for (ii = size - size % 8; ii < size; ii ++) { + for (jj = 0; jj < elem_size; jj++) { + out_b[jj * size + ii] = in_b[ii * elem_size + jj]; + } + } + } + return size * elem_size; +} + + +/* Transpose bytes within elements. */ +int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start_byte) { + + const uint64_t* in_b = (const uint64_t*) in; + uint8_t* out_b = (uint8_t*) out; + + uint64_t x, t; + + size_t ii, kk; + size_t nbyte = elem_size * size; + size_t nbyte_bitrow = nbyte / 8; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; + const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; + + CHECK_MULT_EIGHT(nbyte); + CHECK_MULT_EIGHT(start_byte); + + for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { + x = in_b[ii]; + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk ++) { + out_b[bit_row_offset + kk * bit_row_skip + ii] = x; + x = x >> 8; + } + } + return size * elem_size; +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); +} + + +/* General transpose of an array, optimized for large element sizes. */ +int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda, + const size_t ldb, const size_t elem_size) { + + size_t ii, jj; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + for(ii = 0; ii < lda; ii++) { + for(jj = 0; jj < ldb; jj++) { + memcpy(&out_b[(jj*lda + ii) * elem_size], + &in_b[(ii*ldb + jj) * elem_size], elem_size); + } + } + return lda * ldb * elem_size; +} + + +/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ +int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t nbyte_bitrow = size / 8; + + CHECK_MULT_EIGHT(size); + + return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + void *tmp_buf; + + CHECK_MULT_EIGHT(size); + + tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + size_t ii, jj, kk, nbyte_row; + const char *in_b; + char *out_b; + + + in_b = (const char*) in; + out_b = (char*) out; + + nbyte_row = size / 8; + + CHECK_MULT_EIGHT(size); + + for (jj = 0; jj < elem_size; jj++) { + for (ii = 0; ii < nbyte_row; ii++) { + for (kk = 0; kk < 8; kk++) { + out_b[ii * 8 * elem_size + jj * 8 + kk] = \ + in_b[(jj * 8 + kk) * nbyte_row + ii]; + } + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \ + const size_t size, const size_t elem_size) { + + const char *in_b; + char *out_b; + uint64_t x, t; + size_t ii, jj, kk; + size_t nbyte, out_index; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t elem_skip = little_endian ? elem_size : -elem_size; + const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; + + CHECK_MULT_EIGHT(size); + + in_b = (const char*) in; + out_b = (char*) out; + + nbyte = elem_size * size; + + for (jj = 0; jj < 8 * elem_size; jj += 8) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { + x = *((uint64_t*) &in_b[ii + jj]); + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk++) { + out_index = ii + jj / 8 + elem_offset + kk * elem_skip; + *((uint8_t*) &out_b[out_index]) = x; + x = x >> 8; + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + void *tmp_buf; + + CHECK_MULT_EIGHT(size); + + tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + + +/* ---- Worker code that uses Arm NEON ---- + * + * The following code makes use of the Arm NEON instruction set. + * NEON technology is the implementation of the ARM Advanced Single + * Instruction Multiple Data (SIMD) extension. + * The NEON unit is the component of the processor that executes SIMD instructions. + * It is also called the NEON Media Processing Engine (MPE). + * + */ + +#ifdef USEARMNEON + +/* Transpose bytes within elements for 16 bit elements. */ +int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { + + size_t ii; + const char *in_b = (const char*) in; + char *out_b = (char*) out; + int8x16_t a0, b0, a1, b1; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = vld1q_s8(in_b + 2*ii + 0*16); + b0 = vld1q_s8(in_b + 2*ii + 1*16); + + a1 = vzip1q_s8(a0, b0); + b1 = vzip2q_s8(a0, b0); + + a0 = vzip1q_s8(a1, b1); + b0 = vzip2q_s8(a1, b1); + + a1 = vzip1q_s8(a0, b0); + b1 = vzip2q_s8(a0, b0); + + a0 = vzip1q_s8(a1, b1); + b0 = vzip2q_s8(a1, b1); + + vst1q_s8(out_b + 0*size + ii, a0); + vst1q_s8(out_b + 1*size + ii, b0); + } + + return bshuf_trans_byte_elem_remainder(in, out, size, 2, + size - size % 16); +} + + +/* Transpose bytes within elements for 32 bit elements. */ +int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { + + size_t ii; + const char *in_b; + char *out_b; + in_b = (const char*) in; + out_b = (char*) out; + int8x16_t a0, b0, c0, d0, a1, b1, c1, d1; + int64x2_t a2, b2, c2, d2; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = vld1q_s8(in_b + 4*ii + 0*16); + b0 = vld1q_s8(in_b + 4*ii + 1*16); + c0 = vld1q_s8(in_b + 4*ii + 2*16); + d0 = vld1q_s8(in_b + 4*ii + 3*16); + + a1 = vzip1q_s8(a0, b0); + b1 = vzip2q_s8(a0, b0); + c1 = vzip1q_s8(c0, d0); + d1 = vzip2q_s8(c0, d0); + + a0 = vzip1q_s8(a1, b1); + b0 = vzip2q_s8(a1, b1); + c0 = vzip1q_s8(c1, d1); + d0 = vzip2q_s8(c1, d1); + + a1 = vzip1q_s8(a0, b0); + b1 = vzip2q_s8(a0, b0); + c1 = vzip1q_s8(c0, d0); + d1 = vzip2q_s8(c0, d0); + + a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); + b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1)); + c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); + d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1)); + + vst1q_s64((int64_t *) (out_b + 0*size + ii), a2); + vst1q_s64((int64_t *) (out_b + 1*size + ii), b2); + vst1q_s64((int64_t *) (out_b + 2*size + ii), c2); + vst1q_s64((int64_t *) (out_b + 3*size + ii), d2); + } + + return bshuf_trans_byte_elem_remainder(in, out, size, 4, + size - size % 16); +} + + +/* Transpose bytes within elements for 64 bit elements. */ +int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { + + size_t ii; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; + int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = vld1q_s8(in_b + 8*ii + 0*16); + b0 = vld1q_s8(in_b + 8*ii + 1*16); + c0 = vld1q_s8(in_b + 8*ii + 2*16); + d0 = vld1q_s8(in_b + 8*ii + 3*16); + e0 = vld1q_s8(in_b + 8*ii + 4*16); + f0 = vld1q_s8(in_b + 8*ii + 5*16); + g0 = vld1q_s8(in_b + 8*ii + 6*16); + h0 = vld1q_s8(in_b + 8*ii + 7*16); + + a1 = vzip1q_s8 (a0, b0); + b1 = vzip2q_s8 (a0, b0); + c1 = vzip1q_s8 (c0, d0); + d1 = vzip2q_s8 (c0, d0); + e1 = vzip1q_s8 (e0, f0); + f1 = vzip2q_s8 (e0, f0); + g1 = vzip1q_s8 (g0, h0); + h1 = vzip2q_s8 (g0, h0); + + a0 = vzip1q_s8 (a1, b1); + b0 = vzip2q_s8 (a1, b1); + c0 = vzip1q_s8 (c1, d1); + d0 = vzip2q_s8 (c1, d1); + e0 = vzip1q_s8 (e1, f1); + f0 = vzip2q_s8 (e1, f1); + g0 = vzip1q_s8 (g1, h1); + h0 = vzip2q_s8 (g1, h1); + + a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); + b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0)); + c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); + d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0)); + e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); + f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0)); + g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); + h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0)); + + a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); + b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1)); + c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); + d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1)); + e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); + f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1)); + g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); + h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1)); + + vst1q_s8(out_b + 0*size + ii, a0); + vst1q_s8(out_b + 1*size + ii, b0); + vst1q_s8(out_b + 2*size + ii, c0); + vst1q_s8(out_b + 3*size + ii, d0); + vst1q_s8(out_b + 4*size + ii, e0); + vst1q_s8(out_b + 5*size + ii, f0); + vst1q_s8(out_b + 6*size + ii, g0); + vst1q_s8(out_b + 7*size + ii, h0); + } + + return bshuf_trans_byte_elem_remainder(in, out, size, 8, + size - size % 16); +} + + +/* Transpose bytes within elements using best NEON algorithm available. */ +int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + // Trivial cases: power of 2 bytes. + switch (elem_size) { + case 1: + count = bshuf_copy(in, out, size, elem_size); + return count; + case 2: + count = bshuf_trans_byte_elem_NEON_16(in, out, size); + return count; + case 4: + count = bshuf_trans_byte_elem_NEON_32(in, out, size); + return count; + case 8: + count = bshuf_trans_byte_elem_NEON_64(in, out, size); + return count; + } + + // Worst case: odd number of bytes. Turns out that this is faster for + // (odd * 2) byte elements as well (hence % 4). + if (elem_size % 4) { + count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); + return count; + } + + // Multiple of power of 2: transpose hierarchically. + { + size_t nchunk_elem; + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + if ((elem_size % 8) == 0) { + nchunk_elem = elem_size / 8; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); + count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); + } else if ((elem_size % 4) == 0) { + nchunk_elem = elem_size / 4; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); + count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); + } else { + // Not used since scalar algorithm is faster. + nchunk_elem = elem_size / 2; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); + count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); + } + + free(tmp_buf); + return count; + } +} + + +/* Creates a mask made up of the most significant + * bit of each byte of 'input' + */ +int32_t move_byte_mask_neon(uint8x16_t input) { + + return ( ((input[0] & 0x80) >> 7) | (((input[1] & 0x80) >> 7) << 1) | (((input[2] & 0x80) >> 7) << 2) | (((input[3] & 0x80) >> 7) << 3) + | (((input[4] & 0x80) >> 7) << 4) | (((input[5] & 0x80) >> 7) << 5) | (((input[6] & 0x80) >> 7) << 6) | (((input[7] & 0x80) >> 7) << 7) + | (((input[8] & 0x80) >> 7) << 8) | (((input[9] & 0x80) >> 7) << 9) | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11) + | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15) + ); +} + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t ii, kk; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + uint16_t* out_ui16; + + int64_t count; + + size_t nbyte = elem_size * size; + + CHECK_MULT_EIGHT(nbyte); + + int16x8_t xmm; + int32_t bt; + + for (ii = 0; ii + 15 < nbyte; ii += 16) { + xmm = vld1q_s16((int16_t *) (in_b + ii)); + for (kk = 0; kk < 8; kk++) { + bt = move_byte_mask_neon((uint8x16_t) xmm); + xmm = vshlq_n_s16(xmm, 1); + out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_ui16 = bt; + } + } + count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 16); + return count; +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t ii, jj; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + CHECK_MULT_EIGHT(size); + + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + + int8x16_t a0, b0, c0, d0, e0, f0, g0, h0; + int8x16_t a1, b1, c1, d1, e1, f1, g1, h1; + int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; + + for (ii = 0; ii + 7 < nrows; ii += 8) { + for (jj = 0; jj + 15 < nbyte_row; jj += 16) { + a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj); + b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj); + c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj); + d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj); + e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj); + f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj); + g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj); + h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj); + + a1 = vzip1q_s8(a0, b0); + b1 = vzip1q_s8(c0, d0); + c1 = vzip1q_s8(e0, f0); + d1 = vzip1q_s8(g0, h0); + e1 = vzip2q_s8(a0, b0); + f1 = vzip2q_s8(c0, d0); + g1 = vzip2q_s8(e0, f0); + h1 = vzip2q_s8(g0, h0); + + a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); + b0= (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); + c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1)); + d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1)); + e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); + f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); + g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1)); + h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1)); + + a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); + b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0)); + c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); + d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0)); + e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); + f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0)); + g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); + h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0)); + + as = (int64x1_t *) &a1; + bs = (int64x1_t *) &b1; + cs = (int64x1_t *) &c1; + ds = (int64x1_t *) &d1; + es = (int64x1_t *) &e1; + fs = (int64x1_t *) &f1; + gs = (int64x1_t *) &g1; + hs = (int64x1_t *) &h1; + + vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as); + vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1)); + vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs); + vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1)); + vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs); + vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1)); + vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds); + vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1)); + vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es); + vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1)); + vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs); + vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1)); + vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs); + vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1)); + vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs); + vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1)); + } + for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; + out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; + out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; + out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; + out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; + out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; + out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; + out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + CHECK_MULT_EIGHT(size); + + // With a bit of care, this could be written such that such that it is + // in_buf = out_buf safe. + const char* in_b = (const char*) in; + uint16_t* out_ui16 = (uint16_t*) out; + + size_t ii, jj, kk; + size_t nbyte = elem_size * size; + + int16x8_t xmm; + int32_t bt; + + if (elem_size % 2) { + bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); + } else { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { + xmm = vld1q_s16((int16_t *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = move_byte_mask_neon((uint8x16_t) xmm); + xmm = vshlq_n_s16(xmm, 1); + size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); + out_ui16[ind / 2] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + +#else // #ifdef USEARMNEON + +int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) { + return -13; +} + + +int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) { + return -13; +} + + +int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) { + return -13; +} + + +int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -13; +} + + +#endif + + + + + +/* ---- Worker code that uses SSE2 ---- + * + * The following code makes use of the SSE2 instruction set and specialized + * 16 byte registers. The SSE2 instructions are present on modern x86 + * processors. The first Intel processor microarchitecture supporting SSE2 was + * Pentium 4 (2000). + * + */ + +#ifdef USESSE2 + +/* Transpose bytes within elements for 16 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { + + size_t ii; + const char *in_b = (const char*) in; + char *out_b = (char*) out; + __m128i a0, b0, a1, b1; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 2, + size - size % 16); +} + + +/* Transpose bytes within elements for 32 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { + + size_t ii; + const char *in_b; + char *out_b; + in_b = (const char*) in; + out_b = (char*) out; + __m128i a0, b0, c0, d0, a1, b1, c1, d1; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi64(a1, c1); + b0 = _mm_unpackhi_epi64(a1, c1); + c0 = _mm_unpacklo_epi64(b1, d1); + d0 = _mm_unpackhi_epi64(b1, d1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 4, + size - size % 16); +} + + +/* Transpose bytes within elements for 64 bit elements. */ +int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { + + size_t ii; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]); + e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]); + f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]); + g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]); + h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + e1 = _mm_unpacklo_epi8(e0, f0); + f1 = _mm_unpackhi_epi8(e0, f0); + g1 = _mm_unpacklo_epi8(g0, h0); + h1 = _mm_unpackhi_epi8(g0, h0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + e0 = _mm_unpacklo_epi8(e1, f1); + f0 = _mm_unpackhi_epi8(e1, f1); + g0 = _mm_unpacklo_epi8(g1, h1); + h0 = _mm_unpackhi_epi8(g1, h1); + + a1 = _mm_unpacklo_epi32(a0, c0); + b1 = _mm_unpackhi_epi32(a0, c0); + c1 = _mm_unpacklo_epi32(b0, d0); + d1 = _mm_unpackhi_epi32(b0, d0); + e1 = _mm_unpacklo_epi32(e0, g0); + f1 = _mm_unpackhi_epi32(e0, g0); + g1 = _mm_unpacklo_epi32(f0, h0); + h1 = _mm_unpackhi_epi32(f0, h0); + + a0 = _mm_unpacklo_epi64(a1, e1); + b0 = _mm_unpackhi_epi64(a1, e1); + c0 = _mm_unpacklo_epi64(b1, f1); + d0 = _mm_unpackhi_epi64(b1, f1); + e0 = _mm_unpacklo_epi64(c1, g1); + f0 = _mm_unpackhi_epi64(c1, g1); + g0 = _mm_unpacklo_epi64(d1, h1); + h0 = _mm_unpackhi_epi64(d1, h1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0); + _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0); + _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0); + _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0); + } + return bshuf_trans_byte_elem_remainder(in, out, size, 8, + size - size % 16); +} + + +/* Transpose bytes within elements using best SSE algorithm available. */ +int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + // Trivial cases: power of 2 bytes. + switch (elem_size) { + case 1: + count = bshuf_copy(in, out, size, elem_size); + return count; + case 2: + count = bshuf_trans_byte_elem_SSE_16(in, out, size); + return count; + case 4: + count = bshuf_trans_byte_elem_SSE_32(in, out, size); + return count; + case 8: + count = bshuf_trans_byte_elem_SSE_64(in, out, size); + return count; + } + + // Worst case: odd number of bytes. Turns out that this is faster for + // (odd * 2) byte elements as well (hence % 4). + if (elem_size % 4) { + count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); + return count; + } + + // Multiple of power of 2: transpose hierarchically. + { + size_t nchunk_elem; + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + if ((elem_size % 8) == 0) { + nchunk_elem = elem_size / 8; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); + count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); + } else if ((elem_size % 4) == 0) { + nchunk_elem = elem_size / 4; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); + count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); + } else { + // Not used since scalar algorithm is faster. + nchunk_elem = elem_size / 2; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); + count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, + size * nchunk_elem); + bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); + } + + free(tmp_buf); + return count; + } +} + + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t ii, kk; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + uint16_t* out_ui16; + + int64_t count; + + size_t nbyte = elem_size * size; + + CHECK_MULT_EIGHT(nbyte); + + __m128i xmm; + int32_t bt; + + for (ii = 0; ii + 15 < nbyte; ii += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_ui16 = bt; + } + } + count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 16); + return count; +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bit_byte_SSE(out, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t ii, jj; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + CHECK_MULT_EIGHT(size); + + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; + + for (ii = 0; ii + 7 < nrows; ii += 8) { + for (jj = 0; jj + 15 < nbyte_row; jj += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]); + b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]); + c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]); + d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]); + e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]); + f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]); + g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]); + h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]); + + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpacklo_epi8(c0, d0); + c1 = _mm_unpacklo_epi8(e0, f0); + d1 = _mm_unpacklo_epi8(g0, h0); + e1 = _mm_unpackhi_epi8(a0, b0); + f1 = _mm_unpackhi_epi8(c0, d0); + g1 = _mm_unpackhi_epi8(e0, f0); + h1 = _mm_unpackhi_epi8(g0, h0); + + + a0 = _mm_unpacklo_epi16(a1, b1); + b0 = _mm_unpacklo_epi16(c1, d1); + c0 = _mm_unpackhi_epi16(a1, b1); + d0 = _mm_unpackhi_epi16(c1, d1); + + e0 = _mm_unpacklo_epi16(e1, f1); + f0 = _mm_unpacklo_epi16(g1, h1); + g0 = _mm_unpackhi_epi16(e1, f1); + h0 = _mm_unpackhi_epi16(g1, h1); + + + a1 = _mm_unpacklo_epi32(a0, b0); + b1 = _mm_unpackhi_epi32(a0, b0); + + c1 = _mm_unpacklo_epi32(c0, d0); + d1 = _mm_unpackhi_epi32(c0, d0); + + e1 = _mm_unpacklo_epi32(e0, f0); + f1 = _mm_unpackhi_epi32(e0, f0); + + g1 = _mm_unpacklo_epi32(g0, h0); + h1 = _mm_unpackhi_epi32(g0, h0); + + // We don't have a storeh instruction for integers, so interpret + // as a float. Have a storel (_mm_storel_epi64). + as = (__m128 *) &a1; + bs = (__m128 *) &b1; + cs = (__m128 *) &c1; + ds = (__m128 *) &d1; + es = (__m128 *) &e1; + fs = (__m128 *) &f1; + gs = (__m128 *) &g1; + hs = (__m128 *) &h1; + + _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as); + _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs); + _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs); + _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds); + _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es); + _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs); + _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs); + _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs); + + _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as); + _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds); + _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es); + _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs); + } + for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; + out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; + out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; + out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; + out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; + out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; + out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; + out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + CHECK_MULT_EIGHT(size); + + // With a bit of care, this could be written such that such that it is + // in_buf = out_buf safe. + const char* in_b = (const char*) in; + uint16_t* out_ui16 = (uint16_t*) out; + + size_t ii, jj, kk; + size_t nbyte = elem_size * size; + + __m128i xmm; + int32_t bt; + + if (elem_size % 2) { + bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); + } else { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); + out_ui16[ind / 2] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_bitrow_SSE(in, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_shuffle_bit_eightelem_SSE(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + +#else // #ifdef USESSE2 + + +int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) { + return -11; +} + + +int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) { + return -11; +} + + +int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) { + return -11; +} + + +int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -11; +} + + +#endif // #ifdef USESSE2 + + +/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ + +/* ---- Worker code that uses AVX2 ---- + * + * The following code makes use of the AVX2 instruction set and specialized + * 32 byte registers. The AVX2 instructions are present on newer x86 + * processors. The first Intel processor microarchitecture supporting AVX2 was + * Haswell (2013). + * + */ + +#ifdef USEAVX2 + +/* Transpose bits within bytes. */ +int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t ii, kk; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + int32_t* out_i32; + + size_t nbyte = elem_size * size; + + int64_t count; + + __m256i ymm; + int32_t bt; + + for (ii = 0; ii + 31 < nbyte; ii += 32) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_i32 = bt; + } + } + count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 32); + return count; +} + + +/* Transpose bits within elements. */ +int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bit_byte_AVX(out, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + free(tmp_buf); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t hh, ii, jj, kk, mm; + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + CHECK_MULT_EIGHT(size); + + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + + if (elem_size % 4) return bshuf_trans_byte_bitrow_SSE(in, out, size, + elem_size); + + __m256i ymm_0[8]; + __m256i ymm_1[8]; + __m256i ymm_storeage[8][4]; + + for (jj = 0; jj + 31 < nbyte_row; jj += 32) { + for (ii = 0; ii + 3 < elem_size; ii += 4) { + for (hh = 0; hh < 4; hh ++) { + + for (kk = 0; kk < 8; kk ++){ + ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[ + (ii * 8 + hh * 8 + kk) * nbyte_row + jj]); + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 2; kk ++){ + for (mm = 0; mm < 2; mm ++){ + ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + } + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 8; kk ++){ + ymm_storeage[kk][hh] = ymm_1[kk]; + } + } + + for (mm = 0; mm < 8; mm ++) { + + for (kk = 0; kk < 4; kk ++){ + ymm_0[kk] = ymm_storeage[mm][kk]; + } + + ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]); + ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]); + ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]); + ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]); + + ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32); + ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32); + ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49); + ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49); + + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]); + } + } + } + for (ii = 0; ii < nrows; ii ++ ) { + for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + + CHECK_MULT_EIGHT(size); + + // With a bit of care, this could be written such that such that it is + // in_buf = out_buf safe. + const char* in_b = (const char*) in; + char* out_b = (char*) out; + + size_t ii, jj, kk; + size_t nbyte = elem_size * size; + + __m256i ymm; + int32_t bt; + + if (elem_size % 4) { + return bshuf_shuffle_bit_eightelem_SSE(in, out, size, elem_size); + } else { + for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + size_t ind = (ii + jj / 8 + (7 - kk) * elem_size); + * (int32_t *) &out_b[ind] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + void* tmp_buf = malloc(size * elem_size); + if (tmp_buf == NULL) return -1; + + count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size); + CHECK_ERR_FREE(count, tmp_buf); + count = bshuf_shuffle_bit_eightelem_AVX(tmp_buf, out, size, elem_size); + + free(tmp_buf); + return count; +} + + +#else // #ifdef USEAVX2 + +int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -12; +} + + +int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -12; +} + + +int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -12; +} + + +int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -12; +} + + +int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size, + const size_t elem_size) { + return -12; +} + +#endif // #ifdef USEAVX2 + + +/* ---- Drivers selecting best instruction set at compile time. ---- */ + +int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; +#ifdef USEAVX2 + count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size); +#elif defined(USESSE2) + count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size); +#elif defined(USEARMNEON) + count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size); +#else + count = bshuf_trans_bit_elem_scal(in, out, size, elem_size); +#endif + return count; +} + + +int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, + const size_t elem_size) { + + int64_t count; +#ifdef USEAVX2 + count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size); +#elif defined(USESSE2) + count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size); +#elif defined(USEARMNEON) + count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size); +#else + count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size); +#endif + return count; +} + + +/* ---- Wrappers for implementing blocking ---- */ + +/* Wrap a function for processing a single block to process an entire buffer in + * parallel. */ +int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \ + const size_t size, const size_t elem_size, size_t block_size, const int option) { + + omp_size_t ii = 0; + int64_t err = 0; + int64_t count, cum_count=0; + size_t last_block_size; + size_t leftover_bytes; + size_t this_iter; + char *last_in; + char *last_out; + + + ioc_chain C; + ioc_init(&C, in, out); + + + if (block_size == 0) { + block_size = bshuf_default_block_size(elem_size); + } + if (block_size % BSHUF_BLOCKED_MULT) return -81; + +#if defined(_OPENMP) + #pragma omp parallel for schedule(dynamic, 1) \ + private(count) reduction(+ : cum_count) +#endif + for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) { + count = fun(&C, block_size, elem_size, option); + if (count < 0) err = count; + cum_count += count; + } + + last_block_size = size % block_size; + last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT; + if (last_block_size) { + count = fun(&C, last_block_size, elem_size, option); + if (count < 0) err = count; + cum_count += count; + } + + if (err < 0) return err; + + leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size; + //this_iter; + last_in = (char *) ioc_get_in(&C, &this_iter); + ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes)); + last_out = (char *) ioc_get_out(&C, &this_iter); + ioc_set_next_out(&C, &this_iter, (void *) (last_out + leftover_bytes)); + + memcpy(last_out, last_in, leftover_bytes); + + ioc_destroy(&C); + + return cum_count + leftover_bytes; +} + + +/* Bitshuffle a single block. */ +int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \ + const size_t size, const size_t elem_size, const int option) { + + size_t this_iter; + const void *in; + void *out; + int64_t count; + + + + in = ioc_get_in(C_ptr, &this_iter); + ioc_set_next_in(C_ptr, &this_iter, + (void*) ((char*) in + size * elem_size)); + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, + (void *) ((char *) out + size * elem_size)); + + count = bshuf_trans_bit_elem(in, out, size, elem_size); + return count; +} + + +/* Bitunshuffle a single block. */ +int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \ + const size_t size, const size_t elem_size, const int option) { + + + size_t this_iter; + const void *in; + void *out; + int64_t count; + + + + + in = ioc_get_in(C_ptr, &this_iter); + ioc_set_next_in(C_ptr, &this_iter, + (void*) ((char*) in + size * elem_size)); + out = ioc_get_out(C_ptr, &this_iter); + ioc_set_next_out(C_ptr, &this_iter, + (void *) ((char *) out + size * elem_size)); + + count = bshuf_untrans_bit_elem(in, out, size, elem_size); + return count; +} + + +/* Write a 64 bit unsigned integer to a buffer in big endian order. */ +void bshuf_write_uint64_BE(void* buf, uint64_t num) { + int ii; + uint8_t* b = (uint8_t*) buf; + uint64_t pow28 = 1 << 8; + for (ii = 7; ii >= 0; ii--) { + b[ii] = num % pow28; + num = num / pow28; + } +} + + +/* Read a 64 bit unsigned integer from a buffer big endian order. */ +uint64_t bshuf_read_uint64_BE(void* buf) { + int ii; + uint8_t* b = (uint8_t*) buf; + uint64_t num = 0, pow28 = 1 << 8, cp = 1; + for (ii = 7; ii >= 0; ii--) { + num += b[ii] * cp; + cp *= pow28; + } + return num; +} + + +/* Write a 32 bit unsigned integer to a buffer in big endian order. */ +void bshuf_write_uint32_BE(void* buf, uint32_t num) { + int ii; + uint8_t* b = (uint8_t*) buf; + uint32_t pow28 = 1 << 8; + for (ii = 3; ii >= 0; ii--) { + b[ii] = num % pow28; + num = num / pow28; + } +} + + +/* Read a 32 bit unsigned integer from a buffer big endian order. */ +uint32_t bshuf_read_uint32_BE(const void* buf) { + int ii; + uint8_t* b = (uint8_t*) buf; + uint32_t num = 0, pow28 = 1 << 8, cp = 1; + for (ii = 3; ii >= 0; ii--) { + num += b[ii] * cp; + cp *= pow28; + } + return num; +} + + +/* ---- Public functions ---- + * + * See header file for description and usage. + * + */ + +size_t bshuf_default_block_size(const size_t elem_size) { + // This function needs to be absolutely stable between versions. + // Otherwise encoded data will not be decodable. + + size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size; + // Ensure it is a required multiple. + block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT; + return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK); +} + + +int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size) { + + return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size, + elem_size, block_size, 0/*option*/); +} + + +int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size) { + + return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size, + elem_size, block_size, 0/*option*/); +} + + +#undef TRANS_BIT_8X8 +#undef TRANS_ELEM_TYPE +#undef MAX +#undef CHECK_MULT_EIGHT +#undef CHECK_ERR_FREE + +#undef USESSE2 +#undef USEAVX2 diff --git a/src/bitshuffle_core.h b/src/bitshuffle_core.h new file mode 100644 index 00000000..fba7301c --- /dev/null +++ b/src/bitshuffle_core.h @@ -0,0 +1,169 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + * + * Header File + * + * Worker routines return an int64_t which is the number of bytes processed + * if positive or an error code if negative. + * + * Error codes: + * -1 : Failed to allocate memory. + * -11 : Missing SSE. + * -12 : Missing AVX. + * -13 : Missing Arm Neon. + * -80 : Input size not a multiple of 8. + * -81 : block_size not multiple of 8. + * -91 : Decompression error, wrong number of bytes processed. + * -1YYY : Error internal to compression routine with error code -YYY. + */ + + +#ifndef BITSHUFFLE_CORE_H +#define BITSHUFFLE_CORE_H + +// We assume GNU g++ defining `__cplusplus` has stdint.h +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) +#include +#else + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + typedef signed int int32_t; + typedef unsigned long long uint64_t; + typedef long long int64_t; +#endif + +#include + + +// These are usually set in the setup.py. +#ifndef BSHUF_VERSION_MAJOR +#define BSHUF_VERSION_MAJOR 0 +#define BSHUF_VERSION_MINOR 4 +#define BSHUF_VERSION_POINT 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* --- bshuf_using_SSE2 ---- + * + * Whether routines where compiled with the SSE2 instruction set. + * + * Returns + * ------- + * 1 if using SSE2, 0 otherwise. + * + */ +int bshuf_using_SSE2(void); + + +/* ---- bshuf_using_NEON ---- + * + * Whether routines where compiled with the NEON instruction set. + * + * Returns + * ------- + * 1 if using NEON, 0 otherwise. + * + */ +int bshuf_using_NEON(void); + + +/* ---- bshuf_using_AVX2 ---- + * + * Whether routines where compiled with the AVX2 instruction set. + * + * Returns + * ------- + * 1 if using AVX2, 0 otherwise. + * + */ +int bshuf_using_AVX2(void); + + +/* ---- bshuf_default_block_size ---- + * + * The default block size as function of element size. + * + * This is the block size used by the blocked routines (any routine + * taking a *block_size* argument) when the block_size is not provided + * (zero is passed). + * + * The results of this routine are guaranteed to be stable such that + * shuffled/compressed data can always be decompressed. + * + * Parameters + * ---------- + * elem_size : element size of data to be shuffled/compressed. + * + */ +size_t bshuf_default_block_size(const size_t elem_size); + + +/* ---- bshuf_bitshuffle ---- + * + * Bitshuffle the data. + * + * Transpose the bits within elements, in blocks of *block_size* + * elements. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Do transpose in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * number of bytes processed, negative error-code if failed. + * + */ +int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size); + + +/* ---- bshuf_bitunshuffle ---- + * + * Unshuffle bitshuffled data. + * + * Untranspose the bits within elements, in blocks of *block_size* + * elements. + * + * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size* + * must match the parameters used to shuffle the data. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * block_size : Do transpose in blocks of this many elements. Pass 0 to + * select automatically (recommended). + * + * Returns + * ------- + * number of bytes processed, negative error-code if failed. + * + */ +int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size, + const size_t elem_size, size_t block_size); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // BITSHUFFLE_CORE_H diff --git a/src/bitshuffle_internals.h b/src/bitshuffle_internals.h new file mode 100644 index 00000000..59356f10 --- /dev/null +++ b/src/bitshuffle_internals.h @@ -0,0 +1,75 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + */ + + +#ifndef BITSHUFFLE_INTERNALS_H +#define BITSHUFFLE_INTERNALS_H + +// We assume GNU g++ defining `__cplusplus` has stdint.h +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus) +#include +#else + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + typedef signed int int32_t; + typedef unsigned long long uint64_t; + typedef long long int64_t; +#endif + +#include +#include "iochain.h" + + +// Constants. +#ifndef BSHUF_MIN_RECOMMEND_BLOCK +#define BSHUF_MIN_RECOMMEND_BLOCK 128 +#define BSHUF_BLOCKED_MULT 8 // Block sizes must be multiple of this. +#define BSHUF_TARGET_BLOCK_SIZE_B 8192 +#endif + + +// Macros. +#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; } + + +#ifdef __cplusplus +extern "C" { +#endif + +/* ---- Utility functions for internal use only ---- */ + +int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, + const size_t elem_size); + +/* Read a 32 bit unsigned integer from a buffer big endian order. */ +uint32_t bshuf_read_uint32_BE(const void* buf); + +/* Write a 32 bit unsigned integer to a buffer in big endian order. */ +void bshuf_write_uint32_BE(void* buf, uint32_t num); + +int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, + const size_t elem_size); + +/* Function definition for worker functions that process a single block. */ +typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr, + const size_t size, const size_t elem_size, const int option); + +/* Wrap a function for processing a single block to process an entire buffer in + * parallel. */ +int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, + const size_t size, const size_t elem_size, size_t block_size, const int option); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // BITSHUFFLE_INTERNALS_H diff --git a/src/bshuf_h5filter.c b/src/bshuf_h5filter.c new file mode 100644 index 00000000..114b91ff --- /dev/null +++ b/src/bshuf_h5filter.c @@ -0,0 +1,260 @@ +/* + * Bitshuffle HDF5 filter + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + +#include "bitshuffle.h" +#include "bshuf_h5filter.h" + + +#define PUSH_ERR(func, minor, str) \ + H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str) + + +// Prototypes from bitshuffle.c +void bshuf_write_uint64_BE(void* buf, uint64_t num); +uint64_t bshuf_read_uint64_BE(void* buf); +void bshuf_write_uint32_BE(void* buf, uint32_t num); +uint32_t bshuf_read_uint32_BE(const void* buf); + + +// Only called on compresion, not on reverse. +herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){ + + herr_t r; + size_t ii; + + unsigned int elem_size; + + unsigned int flags; + size_t nelements = 8; + size_t nelem_max = 11; + unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0}; + unsigned tmp_values[] = {0,0,0,0,0,0,0,0}; + char msg[80]; + + r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements, + tmp_values, 0, NULL, NULL); + if(r<0) return -1; + + // First 3 slots reserved. Move any passed options to higher addresses. + for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) { + values[ii + 3] = tmp_values[ii]; + } + + nelements = 3 + nelements; + + values[0] = BSHUF_VERSION_MAJOR; + values[1] = BSHUF_VERSION_MINOR; + + elem_size = H5Tget_size(type); + if(elem_size <= 0) { + PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, + "Invalid element size."); + return -1; + } + + values[2] = elem_size; + + // Validate user supplied arguments. + if (nelements > 3) { + if (values[3] % 8 || values[3] < 0) { + sprintf(msg, "Error in bitshuffle. Invalid block size: %d.", + values[3]); + PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg); + return -1; + } + } + if (nelements > 4) { + switch (values[4]) { + case 0: + break; + case BSHUF_H5_COMPRESS_LZ4: + break; + #ifdef ZSTD_SUPPORT + case BSHUF_H5_COMPRESS_ZSTD: + break; + #endif + default: + PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, + "Invalid bitshuffle compression."); + } + } + + r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values); + if(r<0) return -1; + + return 1; +} + + +size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts, + const unsigned int cd_values[], size_t nbytes, + size_t *buf_size, void **buf) { + + size_t size, elem_size; + int err = -1; + char msg[80]; + size_t block_size = 0; + size_t buf_size_out, nbytes_uncomp, nbytes_out; + char* in_buf = *buf; + void *out_buf; + + if (cd_nelmts < 3) { + PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, + "Not enough parameters."); + return 0; + } + elem_size = cd_values[2]; +#ifdef ZSTD_SUPPORT + const int comp_lvl = cd_values[5]; +#endif + + // User specified block size. + if (cd_nelmts > 3) block_size = cd_values[3]; + + if (block_size == 0) block_size = bshuf_default_block_size(elem_size); + +#ifndef ZSTD_SUPPORT + if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { + PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, + "ZSTD compression filter chosen but ZSTD support not installed."); + return 0; + } +#endif + + // Compression in addition to bitshuffle. + if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { + if (flags & H5Z_FLAG_REVERSE) { + // First eight bytes is the number of bytes in the output buffer, + // little endian. + nbytes_uncomp = bshuf_read_uint64_BE(in_buf); + // Override the block size with the one read from the header. + block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size; + // Skip over the header. + in_buf += 12; + buf_size_out = nbytes_uncomp; + } else { + nbytes_uncomp = nbytes; + // Pick which compressions library to use + if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { + buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, + elem_size, block_size) + 12; + } +#ifdef ZSTD_SUPPORT + else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { + buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size, + elem_size, block_size) + 12; + } +#endif + } + } else { + nbytes_uncomp = nbytes; + buf_size_out = nbytes; + } + + // TODO, remove this restriction by memcopying the extra. + if (nbytes_uncomp % elem_size) { + PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, + "Non integer number of elements."); + return 0; + } + size = nbytes_uncomp / elem_size; + + out_buf = malloc(buf_size_out); + if (out_buf == NULL) { + PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, + "Could not allocate output buffer."); + return 0; + } + + if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) { + if (flags & H5Z_FLAG_REVERSE) { + // Bit unshuffle/decompress. + // Pick which compressions library to use + if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { + err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size); + } +#ifdef ZSTD_SUPPORT + else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { + err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size); + } +#endif + nbytes_out = nbytes_uncomp; + } else { + // Bit shuffle/compress. + // Write the header, described in + // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. + // Techincally we should be using signed integers instead of + // unsigned ones, however for valid inputs (positive numbers) these + // have the same representation. + bshuf_write_uint64_BE(out_buf, nbytes_uncomp); + bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size); + if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) { + err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size, + elem_size, block_size); + } +#ifdef ZSTD_SUPPORT + else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) { + err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size, + elem_size, block_size, comp_lvl); + } +#endif + nbytes_out = err + 12; + } + } else { + if (flags & H5Z_FLAG_REVERSE) { + // Bit unshuffle. + err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size, + block_size); } else { + // Bit shuffle. + err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size, + block_size); } nbytes_out = nbytes; } + //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n", + //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size); + + if (err < 0) { + sprintf(msg, "Error in bitshuffle with error code %d.", err); + PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg); + free(out_buf); + return 0; + } else { + free(*buf); + *buf = out_buf; + *buf_size = buf_size_out; + + return nbytes_out; + } +} + + + +H5Z_class_t bshuf_H5Filter[1] = {{ + H5Z_CLASS_T_VERS, + (H5Z_filter_t)(BSHUF_H5FILTER), + 1, 1, + "bitshuffle; see https://github.com/kiyo-masui/bitshuffle", + NULL, + (H5Z_set_local_func_t)(bshuf_h5_set_local), + (H5Z_func_t)(bshuf_h5_filter) +}}; + + +int bshuf_register_h5filter(void){ + + int retval; + + retval = H5Zregister(bshuf_H5Filter); + if(retval<0){ + PUSH_ERR("bshuf_register_h5filter", + H5E_CANTREGISTER, "Can't register bitshuffle filter"); + } + return retval; +} diff --git a/src/bshuf_h5filter.h b/src/bshuf_h5filter.h new file mode 100644 index 00000000..54ee6775 --- /dev/null +++ b/src/bshuf_h5filter.h @@ -0,0 +1,67 @@ +/* + * Bitshuffle HDF5 filter + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + * + * Header File + * + * Filter Options + * -------------- + * block_size (option slot 0) : interger (optional) + * What block size to use (in elements not bytes). Default is 0, + * for which bitshuffle will pick a block size with a target of 8kb. + * Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4 + * Whether to apply LZ4 compression to the data after bitshuffling. + * This is much faster than applying compression as a second filter + * because it is done when the small block of data is already in the + * L1 cache. + * + * For LZ4 compression, the compressed format of the data is the same as + * for the normal LZ4 filter described in + * http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf. + * + */ + + +#ifndef BSHUF_H5FILTER_H +#define BSHUF_H5FILTER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define H5Z_class_t_vers 2 +#include "hdf5.h" + + +#define BSHUF_H5FILTER 32008 + + +#define BSHUF_H5_COMPRESS_LZ4 2 +#define BSHUF_H5_COMPRESS_ZSTD 3 + + +extern H5Z_class_t bshuf_H5Filter[1]; + + +/* ---- bshuf_register_h5filter ---- + * + * Register the bitshuffle HDF5 filter within the HDF5 library. + * + * Call this before using the bitshuffle HDF5 filter from C unless + * using dynamically loaded filters. + * + */ +int bshuf_register_h5filter(void); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // BSHUF_H5FILTER_H diff --git a/src/bshuf_h5plugin.c b/src/bshuf_h5plugin.c new file mode 100644 index 00000000..22e99929 --- /dev/null +++ b/src/bshuf_h5plugin.c @@ -0,0 +1,19 @@ +/* + * Dynamically loaded filter plugin for HDF5 Bitshuffle filter. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + + +#include "bshuf_h5filter.h" +#include "H5PLextern.h" + +H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} +const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;} + diff --git a/src/hdf5_dl.c b/src/hdf5_dl.c new file mode 100644 index 00000000..8e47fb80 --- /dev/null +++ b/src/hdf5_dl.c @@ -0,0 +1,358 @@ +# /*########################################################################## +# +# Copyright (c) 2019 European Synchrotron Radiation Facility +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# +# ###########################################################################*/ +/* This provides replacement for HDF5 functions/variables used by filters. + * + * Those replacement provides no-op functions by default and if init_filter + * is called it provides access to HDF5 functions/variables through dynamic + * loading. + * This is useful on Linux/macOS to avoid linking the plugin with a dedicated + * HDF5 library. + */ +#include +#include +#include +#include "hdf5.h" + + +/*Function types*/ +/*H5*/ +typedef herr_t (*DL_func_H5open)(void); +/*H5E*/ +typedef herr_t (* DL_func_H5Epush1)( + const char *file, const char *func, unsigned line, + H5E_major_t maj, H5E_minor_t min, const char *str); +typedef herr_t (* DL_func_H5Epush2)( + hid_t err_stack, const char *file, const char *func, unsigned line, + hid_t cls_id, hid_t maj_id, hid_t min_id, const char *msg, ...); +/*H5P*/ +typedef herr_t (* DL_func_H5Pget_filter_by_id2)(hid_t plist_id, H5Z_filter_t id, + unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/, + unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/, + unsigned *filter_config/*out*/); +typedef int (* DL_func_H5Pget_chunk)( + hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/); +typedef herr_t (* DL_func_H5Pmodify_filter)( + hid_t plist_id, H5Z_filter_t filter, + unsigned int flags, size_t cd_nelmts, + const unsigned int cd_values[/*cd_nelmts*/]); +/*H5T*/ +typedef size_t (* DL_func_H5Tget_size)( + hid_t type_id); +typedef H5T_class_t (* DL_func_H5Tget_class)(hid_t type_id); +typedef hid_t (* DL_func_H5Tget_super)(hid_t type); +typedef herr_t (* DL_func_H5Tclose)(hid_t type_id); +/*H5Z*/ +typedef herr_t (* DL_func_H5Zregister)( + const void *cls); + + +static struct { + /*H5*/ + DL_func_H5open H5open; + /*H5E*/ + DL_func_H5Epush1 H5Epush1; + DL_func_H5Epush2 H5Epush2; + /*H5P*/ + DL_func_H5Pget_filter_by_id2 H5Pget_filter_by_id2; + DL_func_H5Pget_chunk H5Pget_chunk; + DL_func_H5Pmodify_filter H5Pmodify_filter; + /*H5T*/ + DL_func_H5Tget_size H5Tget_size; + DL_func_H5Tget_class H5Tget_class; + DL_func_H5Tget_super H5Tget_super; + DL_func_H5Tclose H5Tclose; + /*H5T*/ + DL_func_H5Zregister H5Zregister; +} DL_H5Functions = { + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; + +static struct { + /*HDF5 variables*/ + void *h5e_cantregister_ptr; + void *h5e_callback_ptr; + void *h5e_pline_ptr; + void *h5e_err_cls_ptr; +} H5Variables_ptr = { + NULL, NULL, NULL, NULL}; + +/*HDF5 variables*/ +hid_t H5E_CANTREGISTER_g = -1; +hid_t H5E_CALLBACK_g = -1; +hid_t H5E_PLINE_g = -1; +hid_t H5E_ERR_CLS_g = -1; + + +static bool is_init = false; + +/* + * Try to find a symbol within a library + * + * handle: Handle to the library + * symbol: Symbol to look for + * Returns: a pointer to the symbol or NULL + * if the symbol can't be found + */ +void *find_sym(void *handle, const char *symbol) { + + void *ret = NULL, *err = NULL; + dlerror(); /* clear error code */ + ret = dlsym(handle, symbol); + + if(ret != NULL && (err = dlerror()) == NULL) + return ret; + else + return NULL; +} + +/* + * Check that all symbols have been loaded + * + * Returns: -1 if an error occured, 0 for success + */ +int check_symbols() { + + if(DL_H5Functions.H5open == NULL) + return -1; + + /*H5E*/ + if(DL_H5Functions.H5Epush1 == NULL) + return -1; + + if(DL_H5Functions.H5Epush2 == NULL) + return -1; + + /*H5P*/ + if(DL_H5Functions.H5Pget_filter_by_id2 == NULL) + return -1; + + if(DL_H5Functions.H5Pget_chunk == NULL) + return -1; + + if(DL_H5Functions.H5Pmodify_filter == NULL) + return -1; + + /*H5T*/ + if(DL_H5Functions.H5Tget_size == NULL) + return -1; + + if(DL_H5Functions.H5Tget_class == NULL) + return -1; + + if(DL_H5Functions.H5Tget_super == NULL) + return -1; + + if(DL_H5Functions.H5Tclose == NULL) + return -1; + + /*H5Z*/ + if(DL_H5Functions.H5Zregister == NULL) + return -1; + + /*Variables*/ + if(H5Variables_ptr.h5e_cantregister_ptr == NULL) + return -1; + + if(H5Variables_ptr.h5e_callback_ptr == NULL) + return -1; + + if(H5Variables_ptr.h5e_pline_ptr == NULL) + return -1; + + if(H5Variables_ptr.h5e_err_cls_ptr == NULL) + return -1; + + return 0; + +} + +/* Initialize the dynamic loading of symbols and register the plugin + * + * libname: Name of the DLL from which to load libHDF5 symbols + * Returns: -1 if an error occured, 0 for success + */ +int init_filter(const char *libname) +{ + int retval = -1; + void *handle = NULL; + + handle = dlopen(libname, RTLD_LAZY | RTLD_LOCAL); + + if (handle != NULL) { + /*H5*/ + if(DL_H5Functions.H5open == NULL) + // find_sym will return NULL if it fails so no need to check return ptr + DL_H5Functions.H5open = (DL_func_H5open)find_sym(handle, "H5open"); + + /*H5E*/ + if(DL_H5Functions.H5Epush1 == NULL) + DL_H5Functions.H5Epush1 = (DL_func_H5Epush1)find_sym(handle, "H5Epush1"); + + if(DL_H5Functions.H5Epush2 == NULL) + DL_H5Functions.H5Epush2 = (DL_func_H5Epush2)find_sym(handle, "H5Epush2"); + + /*H5P*/ + if(DL_H5Functions.H5Pget_filter_by_id2 == NULL) + DL_H5Functions.H5Pget_filter_by_id2 = + (DL_func_H5Pget_filter_by_id2)find_sym(handle, "H5Pget_filter_by_id2"); + + if(DL_H5Functions.H5Pget_chunk == NULL) + DL_H5Functions.H5Pget_chunk = (DL_func_H5Pget_chunk)find_sym(handle, "H5Pget_chunk"); + + if(DL_H5Functions.H5Pmodify_filter == NULL) + DL_H5Functions.H5Pmodify_filter = + (DL_func_H5Pmodify_filter)find_sym(handle, "H5Pmodify_filter"); + + /*H5T*/ + if(DL_H5Functions.H5Tget_size == NULL) + DL_H5Functions.H5Tget_size = (DL_func_H5Tget_size)find_sym(handle, "H5Tget_size"); + + if(DL_H5Functions.H5Tget_class == NULL) + DL_H5Functions.H5Tget_class = (DL_func_H5Tget_class)find_sym(handle, "H5Tget_class"); + + if(DL_H5Functions.H5Tget_super == NULL) + DL_H5Functions.H5Tget_super = (DL_func_H5Tget_super)find_sym(handle, "H5Tget_super"); + + if(DL_H5Functions.H5Tclose == NULL) + DL_H5Functions.H5Tclose = (DL_func_H5Tclose)find_sym(handle, "H5Tclose"); + + /*H5Z*/ + if(DL_H5Functions.H5Zregister == NULL) + DL_H5Functions.H5Zregister = (DL_func_H5Zregister)find_sym(handle, "H5Zregister"); + + /*Variables*/ + if(H5Variables_ptr.h5e_cantregister_ptr == NULL) + H5Variables_ptr.h5e_cantregister_ptr = find_sym(handle, "H5E_CANTREGISTER_g"); + + if(H5Variables_ptr.h5e_callback_ptr == NULL) + H5Variables_ptr.h5e_callback_ptr = find_sym(handle, "H5E_CALLBACK_g"); + + if(H5Variables_ptr.h5e_pline_ptr == NULL) + H5Variables_ptr.h5e_pline_ptr = find_sym(handle, "H5E_PLINE_g"); + + if(H5Variables_ptr.h5e_err_cls_ptr == NULL) + H5Variables_ptr.h5e_err_cls_ptr = find_sym(handle, "H5E_ERR_CLS_g"); + + retval = check_symbols(); + if(!retval) { + H5E_CANTREGISTER_g = *((hid_t *)H5Variables_ptr.h5e_cantregister_ptr); + H5E_CALLBACK_g = *((hid_t *)H5Variables_ptr.h5e_callback_ptr); + H5E_PLINE_g = *((hid_t *)H5Variables_ptr.h5e_pline_ptr); + H5E_ERR_CLS_g = *((hid_t *)H5Variables_ptr.h5e_err_cls_ptr); + is_init = true; + } + } + + return retval; +}; + + +#define CALL(fallback, func, ...)\ + if(DL_H5Functions.func != NULL) {\ + return DL_H5Functions.func(__VA_ARGS__);\ + } else {\ + return fallback;\ + } + + +/*Function wrappers*/ +/*H5*/ +herr_t H5open(void) +{ +CALL(0, H5open) +}; + +/*H5E*/ +herr_t H5Epush1(const char *file, const char *func, unsigned line, + H5E_major_t maj, H5E_minor_t min, const char *str) +{ +CALL(0, H5Epush1, file, func, line, maj, min, str) +} + +herr_t H5Epush2(hid_t err_stack, const char *file, const char *func, unsigned line, + hid_t cls_id, hid_t maj_id, hid_t min_id, const char *fmt, ...) +{ + if(DL_H5Functions.H5Epush2 != NULL) { + /* Avoid using variadic: convert fmt+ ... to a message sting */ + va_list ap; + char msg_string[256]; /*Buffer hopefully wide enough*/ + + va_start(ap, fmt); + vsnprintf(msg_string, sizeof(msg_string), fmt, ap); + msg_string[sizeof(msg_string) - 1] = '\0'; + va_end(ap); + + return DL_H5Functions.H5Epush2(err_stack, file, func, line, cls_id, maj_id, min_id, msg_string); + } else { + return 0; + } +} + +/*H5P*/ +herr_t H5Pget_filter_by_id2(hid_t plist_id, H5Z_filter_t id, + unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/, + unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/, + unsigned *filter_config/*out*/) +{ +CALL(0, H5Pget_filter_by_id2, plist_id, id, flags, cd_nelmts, cd_values, namelen, name, filter_config) +} + +int H5Pget_chunk(hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/) +{ +CALL(0, H5Pget_chunk, plist_id, max_ndims, dim) +} + +herr_t H5Pmodify_filter(hid_t plist_id, H5Z_filter_t filter, + unsigned int flags, size_t cd_nelmts, + const unsigned int cd_values[/*cd_nelmts*/]) +{ +CALL(0, H5Pmodify_filter, plist_id, filter, flags, cd_nelmts, cd_values) +} + +/*H5T*/ +size_t H5Tget_size(hid_t type_id) +{ +CALL(0, H5Tget_size, type_id) +} + +H5T_class_t H5Tget_class(hid_t type_id) +{ +CALL(H5T_NO_CLASS, H5Tget_class, type_id) +} + + +hid_t H5Tget_super(hid_t type) +{ +CALL(0, H5Tget_super, type) +} + +herr_t H5Tclose(hid_t type_id) +{ +CALL(0, H5Tclose, type_id) +} + +/*H5Z*/ +herr_t H5Zregister(const void *cls) +{ +CALL(-1, H5Zregister, cls) +} diff --git a/src/iochain.c b/src/iochain.c new file mode 100644 index 00000000..baa97296 --- /dev/null +++ b/src/iochain.c @@ -0,0 +1,90 @@ +/* + * IOchain - Distribute a chain of dependant IO events amoung threads. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + +#include +#include "iochain.h" + + +void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) { +#ifdef _OPENMP + omp_init_lock(&C->next_lock); + for (size_t ii = 0; ii < IOC_SIZE; ii ++) { + omp_init_lock(&(C->in_pl[ii].lock)); + omp_init_lock(&(C->out_pl[ii].lock)); + } +#endif + C->next = 0; + C->in_pl[0].ptr = in_ptr_0; + C->out_pl[0].ptr = out_ptr_0; +} + + +void ioc_destroy(ioc_chain *C) { +#ifdef _OPENMP + omp_destroy_lock(&C->next_lock); + for (size_t ii = 0; ii < IOC_SIZE; ii ++) { + omp_destroy_lock(&(C->in_pl[ii].lock)); + omp_destroy_lock(&(C->out_pl[ii].lock)); + } +#endif +} + + +const void * ioc_get_in(ioc_chain *C, size_t *this_iter) { +#ifdef _OPENMP + omp_set_lock(&C->next_lock); + #pragma omp flush +#endif + *this_iter = C->next; + C->next ++; +#ifdef _OPENMP + omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock)); + omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); + omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); + omp_unset_lock(&C->next_lock); +#endif + return C->in_pl[*this_iter % IOC_SIZE].ptr; +} + + +void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) { + C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr; +#ifdef _OPENMP + omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock)); +#endif +} + + +void * ioc_get_out(ioc_chain *C, size_t *this_iter) { +#ifdef _OPENMP + omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); + #pragma omp flush +#endif + void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr; +#ifdef _OPENMP + omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock)); +#endif + return out_ptr; +} + + +void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) { + C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr; +#ifdef _OPENMP + omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock)); + // *in_pl[this_iter]* lock released at the end of the iteration to avoid being + // overtaken by previous threads and having *out_pl[this_iter]* corrupted. + // Especially worried about thread 0, iteration 0. + omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock)); +#endif +} + diff --git a/src/iochain.h b/src/iochain.h new file mode 100644 index 00000000..4e225d1b --- /dev/null +++ b/src/iochain.h @@ -0,0 +1,94 @@ +/* + * IOchain - Distribute a chain of dependant IO events amoung threads. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + * + * Header File + * + * Similar in concept to a queue. Each task includes reading an input + * and writing output, but the location of the input/output (the pointers) + * depend on the previous item in the chain. + * + * This is designed for parallelizing blocked compression/decompression IO, + * where the destination of a compressed block depends on the compressed size + * of all previous blocks. + * + * Implemented with OpenMP locks. + * + * + * Usage + * ----- + * - Call `ioc_init` in serial block. + * - Each thread should create a local variable *size_t this_iter* and + * pass its address to all function calls. Its value will be set + * inside the functions and is used to identify the thread. + * - Each thread must call each of the `ioc_get*` and `ioc_set*` methods + * exactly once per iteration, starting with `ioc_get_in` and ending + * with `ioc_set_next_out`. + * - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`, + * `ioc_set_next_out`, *work*) is most efficient. + * - Have each thread call `ioc_end_pop`. + * - `ioc_get_in` is blocked until the previous entry's + * `ioc_set_next_in` is called. + * - `ioc_get_out` is blocked until the previous entry's + * `ioc_set_next_out` is called. + * - There are no blocks on the very first iteration. + * - Call `ioc_destroy` in serial block. + * - Safe for num_threads >= IOC_SIZE (but less efficient). + * + */ + + +#ifndef IOCHAIN_H +#define IOCHAIN_H + + +#include +#ifdef _OPENMP +#include +#endif + + +#define IOC_SIZE 33 + + +typedef struct ioc_ptr_and_lock { +#ifdef _OPENMP + omp_lock_t lock; +#endif + void *ptr; +} ptr_and_lock; + +typedef struct ioc_const_ptr_and_lock { +#ifdef _OPENMP + omp_lock_t lock; +#endif + const void *ptr; +} const_ptr_and_lock; + + +typedef struct ioc_chain { +#ifdef _OPENMP + omp_lock_t next_lock; +#endif + size_t next; + const_ptr_and_lock in_pl[IOC_SIZE]; + ptr_and_lock out_pl[IOC_SIZE]; +} ioc_chain; + + +void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0); +void ioc_destroy(ioc_chain *C); +const void * ioc_get_in(ioc_chain *C, size_t *this_iter); +void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr); +void * ioc_get_out(ioc_chain *C, size_t *this_iter); +void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr); + +#endif // IOCHAIN_H + diff --git a/src/lzf_h5plugin.c b/src/lzf_h5plugin.c new file mode 100644 index 00000000..cbf7e3d8 --- /dev/null +++ b/src/lzf_h5plugin.c @@ -0,0 +1,42 @@ +/* + * Dynamically loaded filter plugin for HDF5 LZF filter. + * + * This file is part of Bitshuffle + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * See LICENSE file for details about copyright and rights to use. + * + */ + + +#define H5Z_class_t_vers 2 +#include "lzf_filter.h" +#include "H5PLextern.h" + +#include + + +size_t lzf_filter(unsigned flags, size_t cd_nelmts, + const unsigned cd_values[], size_t nbytes, + size_t *buf_size, void **buf); + + +herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space); + + +H5Z_class_t lzf_H5Filter[1] = {{ + H5Z_CLASS_T_VERS, + (H5Z_filter_t)(H5PY_FILTER_LZF), + 1, 1, + "lzf", + NULL, + (H5Z_set_local_func_t)(lzf_set_local), + (H5Z_func_t)(lzf_filter) +}}; + + +H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;} +const void* H5PLget_plugin_info(void) {return lzf_H5Filter;} + diff --git a/tests/data/regression_0.1.3.h5 b/tests/data/regression_0.1.3.h5 new file mode 100644 index 0000000000000000000000000000000000000000..875b751bf73cc79ff5dc4e073c75288ff440856d GIT binary patch literal 114447 zcmeFa1ymi$w)l+&cL)vv5`udmxVt+9cZc8(!8J&54-z1_I|O%kcL@%`HThtG$>iqd z&bRLRzjyDc!f;n{zp7tb80Z<8Y8c->u8r^$YUL^(*oCw*9?6R=h@YwPGNc*1|ARqwf?SGWJj{&^1$N&Lv3)=5K_Fe*CpD}J~kghj3^3h!$ z2ngu69l)>mzexZCp5EGy0QY;|c7%9$#<^(&YvCjF-7>tb|4(nd8zIs(KT05p7~~qn zgT>Q)&OZ}ySTWh^nlF%39yV;BWM3J(|ZDbX_!A4{*r_z_ei)b}uMr9Osc@;!;-Jg$zdyaidoXYWj`K&;+K zr~&ajKm1ZZH+%8rg1QD_@o{p4PUr{GZvGy=$iBC5h>25JV(jF+V<~+FyLuOqj^mzB zB3%p0;rE5)a`BhxDW{o?_P>bcDhd>cpM1tRQ;0r(C4I;720q7ieA|gE~((}w0`-%V+>t|Duho=8M%T1%_5Aq21{b&cQSroHER&JuQn0O}p*|!nGw8PxULoHXXci_Gv=} zpM>&ec8Ya!j+T20eC-xljV3Dkr>?SUJhEkyNW8(kG7x&gIDa{|M`G~c1$*;!fzrvk z>cee%%Mx8FNpwC_SUakyZD$Tb;9D4O;^up!J`+>ny}A*N=`h|Un6yE!Y;%{BM`>pK zQHh{MvM9AQDWs={swV^?Y)tXmDvmS4!<0!I3yNpUEI$%hc4~*=;7ko*LZgM$K;WayIjS#W`ES0FrZ5YSXnGRt7l<~k;4jJ6Xp8sMcAI&1 zf?#n-%bfmtpf_RqI&xgc>uE$T12+Qao2TXp@?f>8bEV;uw2N3dPm+MRM_59H2DU04@SO_>mNc2}0QzaH_XysS8y)eS|zT1+DW-orJW1?bw**5N@ zJv$^TT)%MU}8CV*hZ43gSrL z#*tNiu5lDo!D%Yyt1w)a7`JRr<57N;rTtdt>+xbwNwnW=r|v$i(`)am$9N*dlqnY( z)tt{;)W*!QSXpST0_LK|D&rKy{Zar|*5gb`h++w{y~**>KhGgceoh)^;y?_bWLs5* zdZ(_H`3`ELE1lM=%_3v5i+}^$45PxqY76{v^b2+Kv&eGiN{T|N+Q-r~HuV{oMcS*# zlUgtzFe@o%Jh6cDiN9bJuRD(0cdJ^;cv5iMIXReajj_^$hwOJiNMF)z@Hb?l3P}o2 zn?w^+8)7}@3fL}A#hI3Wihd2_CTw0uj_Y_m)#E~_68z8lw-a~N;{%#ro)(B|QXGq3 z2}!4Hp?^G7&Kqilo%4RxndGucx;zV|bR`yRsWsfDgs;06#Fqa}bMLm7+VgFV8TBoB z42Q>Kv4?~+v(`K*mG!{-?J>b>k5~D?PEUXltRnr3QhfSN16s^0SY=v)#vy4}kxwTE zqMvq%-S$F`YlH2^x{K%o6m2$wTpUfDQz6290JQZ%X&Hp8b~JeYv&$RZoC;>);1j*~ z7(1bIu!Q}Sxb5sU6a>4& zar6frP9k?hEOj=U#GZnuxmj6!yqLZ*AD2>?Cpl#XyvD;Rr_*d`ZFTDuRc&ns5 z+typ`lFT=7&%X{c+Ygzn$_$C_a||QS?-&peIRH(#I@2KT+Ev3G50Ncda;NgzzXR1P z_yVV^4H9Da%ss}*3h*O&R$9FUhfI!G-P2QYf^-T?Z-Ob{H6_bh#5F;kyyOeV4-oyR zHKj0V1cBBEz;Fy#Nh;((g#)m(1tSMYQ?23a6R5exk4OvG=356JjVi276dl2at5@}| z5?@+fkQ8P~VzZxlTPb=iLdy4RJTrR)lL=>}u*tN1s(R%zb3Mg>{cw9-1OgC!Hxwtd z-^`j{4h;ORX6;X|e{k>G>C^tQtKTZ_?SBQ$f8FlCy%rDv?Ej5j34GfP6oCG3i953t z0Nk158o?s~O#>@SeQRA^V;yE(OC23teJd+7OD0N6Jp(I!YfXx4w-2R}f&FW86Aepi z1Iq8M|H$uVhHoOrbp*POH{o|X76>RB<+kX?mim1${A|af-M0G0_8-?3s{Zd?{nI_} z*A)lyKdt_~5ce)+_u}`Wf10>oSNdB2wE8bocyk?nbIpD42fmj#|2F?wsE7Ko^kx!o z!oQ|}+vZ{WK;VJE1Azwu4+I_v{51*unSb-fvvL#e<^O;GW;XPXd*0@LwhMi4EB;Hi ze`c58bGVnk4Pxq3-1`?Wdpam;1op61v~>HutkV^VhN`??-blf63!N zZ%;CiexHEf&AFfX|9kc%-0wa9cpO;V+)DjJxaahX=6`7UkbEHUK;VJE1Azwu4+Q?I z1pdsPoVXj<9Pybz`0AH?dOv@&Lk%r5X0m>mepy*I2hUkSkQ~ z`l4S{@kS4Lb7Y2oJh2nM)q*z~$xW(-))B7uvF|h}7cioJNh9orS> z(eA5k(r95HC%q>jp=5&pTgbrRlV>L{C21n;CV$xrp6cexD+)l_BTPLr!Z?& z5&?61n+;nk_!YW`NItDoc4*1*0*oyJYP`yekwG~)PiG4Xu}6AfI%`Jtv0eSF6cb}{ zMOphOhWx0=v5~7)nU$|fg)&jA#HDDZ+fMbcCNoZG>@kFl^R3A3z|2Y%YTz+go&^Dg zbIdXK6(_389woSUetl!hiw@hYWwNF-GYII5m^VZBSphj2pv)flX$S+U{)hBNo%jqq zzz}ITR3(qojQ2|xO%Sir_(6Q(h;ZHHhh8rcgT8pzjhp*5w84cBVc( z_M_Tj<<=*``}D%kUK9r^V0~d^d$kTi3z;%1kme0XPMy%*7t%aX;xv1ARR-97y%t<= zS0&tY8t;pQ%lCyKtW-pLK`~|mO~kg_d?#wv<%^*6Y=aom4y`@0T(3kDlwn6OC4pQ5 zXM-L~<8Phv7)*5_99fhgEL|R)5$~yC9{`YPkyDA$O-xHBfSE9n z7|-Z}J>@e^c``nzEA-A_t+H|CWK+XnI92ji3sm@Wqg7UE1`d_yaF);>V1grU^NpE; zwqc_0I5~CLKC4b`cyuh{s4^vnjh=Ldw5x4&PU=)FhWkm5iG&RYr~ug7MzFno5j*zm zM9gk0S<||!0o+XH%fu2dAovBgVI`TSkVCEaVmD5wB}RrlC>SUP8Em+kqZt#)h*BSR zg!Qc!{m~%xW%Ntmo-JbaPY_AfmBHzr+FF?TAJP|3svyB|pzKb)Mpb;oUB$2s5rX#y z4XAaYsfw2P3opZ?Js4UEmdcau`XWhQBl?T-VJ!d0n9(IGP?Ir49q%a}_&$<7qg<7T zZDT4*!QtRH>57{WkzvYtf;;sT9)-^9xrbP9mB_Ug*WbKR&WgfdAgC?DJ#z2_Hb)Q@ zF#muWnz_P;rUoIb5)hx2(4XB36Uy#_^$=m|f1K~E zmiRQ;)DP(CTU_!Xp-;KhHG7YV%ZG{qZMv=P?C~KOMGU649CSbLo{$of8J`n3rXkz9 ze?sYp(C{EbhU*h-nR!}5{PcTH+~Ah5iLuz7*0@0;XG|z(=^%|s6&cw_t=oMfPW7^5HUQS zq2)-${LVAc2RDW!Ls5OKdfl^XPtP5KXNtfJB5e;i1iQ492Q8t$KT^6WT(1nQTFb2) zh+=!D55Dh>%Z?BI0i?PMFGoYh?AchJE#bhW03l{_3?lNTMD+969M(2(!X-_RHvm%s z;%H}2r(Ka{WbK3{UE+=H>t3V}R&wL^7KVpMOM^%Nzl8&Y>g=l=9^4o6KUx*a_b+#k zz)%o*3nJIe;MjG65$CUN>9IjGG5w{E7Cv}g#I@liq!G6eV#vFiA%Mr*)JuE)-XrrW z1s2Bg_zQ?>21WIOBxo&E9~);tyVZaS67x1N;U+u$q!1P=w|icM4d!>M$aX}UQp0#M zfttre_A&uhP0bN}(GESXW>Bj95*Hbh#<%+-1WdZ+ij{H6cor~8vZ?6TWEyK{c#|L^&e_I{YvyG3#>6gj`AZ<1eplef>WAC|fd>K)1Re-H5O^T)Hz4q5 z_Tlx{oS1Vng73M zPoDlTt9Og!S|})fPv4~OIsLA{kJS&k2LcZS9tb=Tcp&gV;BP|!aQJ#CuGjJU{-h@s zZGqaw&O)JoMJ^WtA{!3Aa;J{FXdkfdNyy=Q07E_jH=(_+1i&#Z-vDf4zIZ+}N_5La zc&(sB5}s!8;cHR;R@G1`3Jvh`6E;5M2J{nJdV4+K3Iu1g`nKhF4}o9=0~2<2S;qFE1|r3x7;1MsaHrI1k2UG~9z)?IVQmr*xH z)-?-%L>$=l5nw4uhFSjimA(+TI&`lAfCc;cQsawWLUC*?_o+{B%oB17fO7bZ100jg zGnq-+N6L&MuJ2J=x3C=QWre#6!%TTKdFux&VQDT_@i&^O^So;5gHoK*dWG<|cB9bu zK-($SlRd;$Rx}l9*KK`(0R3AMjA<30kybF_wH8u)oa02;W8{>PLW)rS6BY`5i68+l zU*I!$MBz2#?Q!}a!W!y-9F-H!C_Bs_&sBw$-o=P=r-#7dnRV$soiS)j)i7z;c{Eu5yt*$wPz=YsN3&o7fa1S$pv61BSDJ2dFL^B{mN zEh|Tj$EGwqVlA08L1^-yS{-YLBl%(z<@0$kiD|63A@}O2h989;j48j2<0kqiV)1<*KKe0*b9!gkrPVz*o|Rs46{_Lu%NcjBV2Z}gn5k|Ryh6|S$ZZ5yO~$!UBR7@zV^DZdMCHMZQ|<6&@!@2I z)H?uTFJI%#xBNz-km`*D6vH_#iEC=+YZsczdnNk@J`r%7F6T6nx-XS3pSc{-`v<8J&2@;V}ol5mrhIj}my|N#L`ysaz z%6NUkve=i#prTf-cuWi4_<%uViY>0%vPfMu421eYD6(aJV6*_Kv5iD@tR#UC4Gh7v zw7I}$FY%2+Su&@wBy*+`hF8#~O@s(VC8QSY$WZ$*qW*a}{t#ZYxL-|ki_T=aGNRkP zWy?CukO=!Ib(3m{Y8bfILr`|y>Fh(ga^L##?6@L!P9$a((jP8%PaZsNug#WlbSRqh9m?`cg;7LDCTK31o z2-7!=H9ZPDtSt;8tbti#v7Xxt`8IjPlG6g52M3uVb_p?I`D~Q&Rv zkW86_O$!oCholo1udsBT>A&w!9 zb=ZJV*@`lJk>JJ=-HsGl%8BzW;1YSYw0^iaD!phjIRwYBX8TIMP>X~>^g<4^pqR~b zeHO9ipM~vH-M#Yg`Pzm?8w_`W`({(@JJ-ge zpRg49>w9zIwb^El!}Rbw1v#G$c&s=FI8)t@BB67WhBhx-tyh6&jz!NPoJDs?6<$6; zcs$IU_<51L!Yl@``xzXSiLR@F35(LzUg6qdFQdU^=sc;strrNJJXzKBP?98=Kplj; zrF0KaGbYmT5f(~swh9zqDn3Fzn0U5xkL9=83dT&3o)1dgIJFWlmw^zr@-=%_eXTuq zHU%!-<5~g_9GaAONsGGi8CwfD#e^ChfJ~z36<%vq^F*vP2_>Fwg!0cR8{}a%2YrhG z&2=SvIfxxRkt)dNi+(){hdl#!n3TOj|2Hv`afedg&0pp0o;DbVOij|X4jR~%z%Uu} z`eT89WM)iM5(MaimWPr9igY;$QsXqQg>+3%5(>1;NygT8%a ztfsj9TPtlUfH)+S9y~xUOV zgL%(uEmf+sRTW-?xoKx!DLI<#cOSdFg>YG)4~y+N_9D+)Q$KSEkSl+$)~Um0TdozR zAXn_-hum?t5=pAKOP)h>keVnEo_PjO2GuLr!Wq+SEnSA`Tp?o11uMVDLb@_TBKCIn zq~hcW;F(_>loYklA%^#%hfr5+QH0Q1FOPB_YYQ9)Kt-T^Oxd3Rdx=UcGtx^jFI(i!6q%|S4|Jv z_iU~SQGQ{)08pxM36-3iuqFT_TMeebOiIa$h@f1_;jC^aGKoLCP{V;PQEJl*HzX^(8v zZ6GCc-49x-qwzJ~=X+$5pq-grP1@d4w02LmA?KZFKdaGU9?DA1qp*Sk<99kcRFs38 z>i6e(*r8_QLFuv^Ob^z~kX2Olo@jMLVUE~^Z=zAxg_11^Qq=X1?Tzhf3yN0e%K8}j z%lD|Sb&>66( zVQ}VC+Yt0Vy@E6`DD5I2f3vm888LnH(#h+&z;!VYM4&%Dwcgbx*xsIKi9uDi8NE*!l3XQ`weQI1L0i6XRadKQ1 zO3fJ&aH(mO=!Esfh4r?fb!3R17B{LPQ7$a!6Z!1-3$aC|?vLb0=&KakU&h+vu{*xT zOrC!ZNg^WPh&h0)Ft+_v-!EjGJ}kq@)FSSSz;^w&aY7R-rufm34~MUCQfu>)QVy>$ z`o0W>EE<$_;-Bt#fmHmy%u*T#OGB@k#NPXwd(eOFytgmVL3>BIiW86R3 zS(mIvv{=j^nB33rhX5M){1k^w`Llp3gt|Z#p&1~$W-?bv0hwW*S^ zr~Q#~G1z_k!{P#nha;RQHG*^{Gc_;B@rW@FwYtREJ1r$Dv>iX;#Zl&lig1#-IHl2} zJ%4Yj1AX?G!x{E6PoHwK#Mw5QADb*?lubw8^Mav@$?=KJ!DRH>GP+7SW;mbOa}k@E zecLH&ZrefhEwDx*)4c$-xHuGM5dDuj*>jn?*xJZK9J3@}n{db?A{U?Ao#&1w_yvOE z?3?y+^ooYif1PPQid~NO8pKA`UOw{d*PH5+;*;$$lsM{~yL5_^?U3;Eq@1oA=MbG5 zsN1)Q35#@0=U8trU)T1;u{=}eayk!d#b-Yur+w+%#mL7JV>Uzm0wn1?mUrQipn)eP zH0YkteBoy4B8=@Ew=Am>c<=j8MrR)Z{!f1HSXyzI+|nnNU~abD6*cP(h5qQ9AS(>{ zQT^ozA zjLD-N0fl6O-ZYUP#lpWWFSa}@V_^-cEa0Wz+)iQaMf>suiZ;Sq0?g6O=mKLOC#|{i zjHr*hq`{M5uq#^3y{P*NdpxSg1?nSUu$lH@SLK2wGpY`ws{;jI_V_j7ojRo^8Dh0> zkO*^i#U^!ipN)-v7?ly;X=t{I7KxldHe=mP|9Q^06Bf!0Gk>9BLZ(m3JHD>L*mj8Gh5RrVZ`)8^9oM`v4gNzo&0f-;a+U%O7$N1Re-H z5O^T)K;VJE-;Ds!&7+X@*YWyRUHYgi=VCe_5M5S>SG^cld1C_%R^r~KgFb(vNE(U_ z{~!#-Wg^@wpCxL|kjs7Jn39iQKX7}1Y}GltU%-+@Cun@xmRIGOj!tKaqFS&i8G7;9 z`wQAv)y9%|Oo7X}AZ#X@uoGfS|8EM~GT`tpca~z*d0Fv?%t@!KMp# z9U9TD!(?UxiZF?4pC$Xb+^4H(*`?@ui`~{tQk=%lOI0O-p<%TPT_@IR1>joT_Vdw( zU8!5etL$g0@|?#T^`E(+f0(Hm4Acy$D+eT7781` z((0L$YEpj2pJ)tR`z`Pj5`y-#cfIQXv`wtC;Y_<>K>RNq*()oj(qpZv*6H04ZXUC~ z9>v$UK+_y8O@$IJA1H;y>4bgH87WPr+b~BoS~=5v7Tu3vDY6`Lf?YdI3M=<=CPWp- zWgZhp>sD3gs$b3!=N~?Q*P_1Vz7AuBN3c6T(yEDPhQ&K3_chR2s`x5eC5(MaX1@s_ zCNvQaoE+yH{^8NEuA5Xbdxq`_kJnX)kFxE0Ggalu=N$iYfQxU3sEs|-Sd#{3rRGLP zexR+n^+M5mysag#YwC;{yW-YdCrObS-hedVRf!QA)sghtpVEb2j{;*4f;)&7yc1rK z9tXEwTgQNLC!BaU|8OEN`j7X#UOWN8)hJvFAWiabF)Wjfq?O)e5bLYKQ-`)++Dsi9 zGaV?am!>t0DLQ@x>-?H92GfN}*P2m;4ZQY18aJGi{tLM?5zeP{@!AiWz+}2pPw*Sx zgHbI;Q$^@UefmOh?7}|{=Ejm;-uHZsJVfO%6?tCBi+5u~##?`B^o97TCi)I*rp99M zM)v?;VFdsV!=v3<0YMp`2D-{_+7qf6MJ+`*D6Bn=aKx@r=QWomc11-bAIvCA9cPsj z{0y)3_U$i@ErF)lk2F0Y;tA>yHZIan2qL3er+J+0Lslre_NzPzP%K;c$KgDEvI{CO z!XR3s7+l(y~tcTX1A$BdV~73JO8ahPUrhK z&9X_+xbY;6fu-BlD-pE14PMuyVb2U$4MC zvBMC=jo`ew)AZ2J$22;gd1=ndspobi=U)*RHMZ=fiQ z`_5fC)dqM&+PFaQ{fA7<6dR6p%7H_Ny3AZXsj>aDhbbZKJaWw9B!L{Qa>9y(Etqf> zLp=vNv8zWI)(d=2WTT!BCahqxS!lW^)VMSe(aBH4dGEol7`yZx19ngt+dNsfZ?@c! zvj52FL`&Pq5+*OEbxc9CK~1If{uPsq10e5#Hu=_eRUYtc`1_EoCZ=BDyyjP2yq!lC zFY4^`WV?0?{D}KSKU&0(wL})_XR$_(nA<~E z|Ly+cKl3Bj-woB-z{`a2e`xZaS zU-^@8ewfwo`}n^9p1w)d-(`PS;4c3l9tb=Tcp&gV;DNvcfxjDp8{^>Sg=&C6dtV}% zTKr^|GgE3%5z~-Zs|xUl#e%8VzRKsky8a~tl}>O3w$G5ujs^mAxwSS!kW)k(q;6*~ z)wnHOL0YKf(^=<=joi3W9p`*1ahNRFu8M;Urh_5gv``Uh*m0Ln0k$b%o&fp9SiOo4 za@pMhT$I+J!nV7TG5rFR*FwL$-(3_w*{$+KhZU(iQfacQ@s+O$G5L@!0NcxVRy3?2 zae-tuYm#T0-LT8m7D_%r;>OX-lZ$@yMISQV%Q(L(5{7PnS{1?xJKGb@?mV2uuUCvc zcv~{*V7OqNf=JQjj#BG~5xue2t<4;rW?rzsv9jFzs9Dk-aVW>otrv=ekgs_{=9Yr9 zvu&r7BYdU}x+?tc@QINpWU)SMBq5XbP5crcw= z{vjvSQlFYE2oMU%6{b8sx$&mI^c;SvvR(r|2h)iZm(Rc1t&Ef7zcKAg55EHkX3hup zK9qEN-!?H=8a+h8&M9?`4Q2l^z?qDGqUa&VK7lXTXv#k;gL(Y3!5aV zFA(^U48??|GAtQsE@rTE0FSLZa!b!l7e*HoJDwZvt?%qU&*oWgee0;qR`P;8Q8^~z z>TndJcy%G0y}c&qeT3=cbnzfG%>b@v6xNir8MUN27!rJTl1fozU7wGQL})Cn!7YWj z7nPDY0&44nBc;OcUT%yq6g{SuHIEl&@KPpGGJ9P7HlaLlhkC02#r7&i{Fac#qj^ed zcF@lj$!tPNM^mPh{dqDkG=O_%x=!=Pgg<490%E}#=&OZ7D&W3Z$$cTLy7Zh=Rc6c% zZUMorD4u3wUcGN;*tn@MB3#0f3&4Cg6=u71H1X?vNttQwC@kQ9d3v0&Pc zfF?MJY2LCYea0n+f0(G@4VQbd*0!8Hj?%JhSX?r<>zYNz^oZ0NKTJvXord^%v*in5 zGxhXYO$MRUT2`DZI&gw%k-1o4XlgrXlsBw~%Dd4Msl2;A7S>q^)~_wvBN(~*r7fS8 zfXL>!){vvK0e3v6ZG&a|M2|#X&Mp@Wn@Mm|J{y6TGQNt6%S^!)*wf_ciHE%zt8hex zD(#8f>+6Ar2(hU3W`?ME?xJ3cu9EG#in0(qJ6-s6Q&|0M1T8!7Y-0Tww>Y1ilFvfl=;m)iil;}r_-2lA`hHEH8?m%Wq5E00_itWU>Wh`tl zI?>*O`3?@?6cPnuw_>vu^zCGJbGQOtSY10EX zJq@zHWs~@eWqlr)m6;z9^(T za)o1g*gTkx3XBRMVZ!OBJe|b2xi1A=putOD!2uUqXM&VvF|S|hx|Nx}*~#@J0>FUI zVY3SWK*e#+Ah>kRx&UFc)w2~aXAR$OrSvMbn_LL@R+#21qPO8r=*Two1czfS)pZzN zAQW8H)-oMX@0r`0mWx#_I7f-7N;ucqH{pR~Zhkz{4rcOZIHlCXZ_Ezj>Pq1e|0ZEd zIpl%iNT~b}RF7cbIb+B{-X`^ia;D3wc)H7r zFwP=bjulX5*kvy1wd1!nt%ngT?}YX+$0Vqa@L2mo3se4X{}cajjOWk%NnLjs?&UB2 zCqF%pn0~h+$bRrV{Or&8?|L5b&-_XE9PZ^WdHm=7Ngp$B=k7+;-`zPs^Z)n!NfJNI z>i1)1yzqPaCUwv0cLjc|e#ku#cp&gV;DNvcfd>MA8v?*&-~CA;?#_$uA!!v9IFpM! zm2`n|nraZ%b)lVGZLo+XWIGz`e$vXRa0BQ=ru3^RKuk|~FhWvFNv$04`O&y%Fp`~! z!6KGn0=08eUD;hi>I{t(Qxdl^rkOxz1N;p)lGOWfN>PtH|2dnWzC-hhk3h6L%g~{^(!ejSc8b>7|uhGhua9lb(|FFNe;~?!wz#vmd1}IEWYWMogUAH9E2pup5yqs2&n}LF zzJjN%#|Orh^X+keg*_?#D&+G<;f7I&`XgEyVCBKZvNRc2y$wCO>0NBqof3#Owsj*T z=M~LJvnYiVs0)KcSb*NB+AMzIYDs2Ca4If<;PGw|jhII@_$OiP{S0q}o^xxamv7qoNBdYXZ3=YC109=)kJ6I57(3*ufc0go^ z^`O4mSn9|!CjpX;h6Bs^Xm2f`#eorq_37e`h1WS_3tY}8Lz8tf`Z)u141E!)y3W1P z90^3geHZtIpcez-n815RB>N0H7rYKZd8Zy2Wcm7s=lg(Aa^#7rd+dp{Ib>K_mHg5k zr^jHz`{C(uBD9wKdWM_xgUXLkgY&H7$v7KW(=oPce3qAMu}#vv#v)gf1co_F&?>YN zcCs+*l8&B(V)tA<`bbEu!o2lHy$)W}iRf*RLw55e6k9Q*WbAU$VK_tg$`+i&^w-y6 z6*@3u^=v_}5iFEHc64Z|l<0){LL}JHmy9ARbe!SXjKqG@bv>;N*it048JS0Qe8=u1 zcYGp=jvUnShZ|LYcjx@f|KIZ` z>HjdRzkSYc8=~g#>6_F&r{5L$vHBtRK;VJE1Azwu4+I_v{A~yT^L+Ow^*&jm!}TCF zghUV?)0V7w#nvJL$)T`E+a#}gw&vY%!Q*%6<Z%+i}MRW@iERPq~sGsTo4@$(hn8vIzl?#kqg!y1D9 zY@>bpHdstU;lietKox-lN+C&afk55Q1`^nYWs)B$4{X>RhcxeTHgjR9PzR z^Jk7w=HY_Mt@s8>C`|Ug!8x{rxlTvKJ$j%Q0zQv zHjd7dc?1pM1_P25pK_u&UDcPCoBVL}nY?knh|c4_Jvx+-EpOpP<8W?0DMK5)c5z@M zh)0CGo8q#?WrCdW;O%Y0efB!bfy!#*3+?q@DH6+B%Cl$Ut8$&^rI3?>i!?42y43GP ziJOp_-r^8(KU4B1vcauknA}qDW>Y^RR78WQ?}3wu5?h2D#Y5VFYxz>p76!nZ`f>mj zwW5;tsrg{y<0A17Sk8?@9Yo#)P2$_yOJZxSq?Cr}+iJVByELL1^nemz$PG))SJZ`X3Ytu>M!gOgt+ivlWt8TD{_NY5Bz@miQf|Xpj1IQiswZ!2j z9(BZX7QJ=YiX@wQNZ$TR#`A_66)+s5B1OFS-vHcq7SnbR;VoT-5_-P@gkY+wX-qB$ zc$C>HmQCYf5NtouRKn}e6nfj!s@X$^ZMEyF0RZquOd5F?)yJ?0y12j1N`&B;8gBA) zNp7dwTr2J{WgyC*Vfa|FQKkakoD1#Fvnc`)%yMay4^ip7UY}nulzcM+*K)~{OfkV?%~IpKYLE}em&jGU#_eF{BgDgc#Gsl z)qndq`;pH7yIi(Q1YVac0}9u9veC?YJA1SQY7zrh;$F|zkOIPIv@hr4j+V&vL0(IG z+F)JDc-jG%Q9~RN;;Q()d(P)8YEl21+{_m?HE1RO(Pby24`TwSeqq|Q+wl41j(`cb z5gwSFI5gGVILZmHj zojvg=&4q;`2;gr3;I3j02EUT=H5JSfi8{818iUKCnu6PD&I&$ZA;7NY9E)Na<<=kfsz zLV?N+soK;GYZ8-pIzxoz5I5Zp=kuj1f+rktvmJdDP&5Amh>4CArf$Hz!)4EsOoTh6t6gjnn1rYgTcE{qxQLg*}^Q! zbQjU!5xK`pp1sR5X=WAK4ZtL)@dnIoJ_+`%6l7O#(a}wMDd-Oi9MPPdc+d@s7HL)W zFGY1z+U4rrarI|Rz;Z6U8O?u*(oqo3C&I3lTjW^#jFxTWJz8a4xNo8@Sh(v|i-Te; zrL#D=k%$?;?NjP>q%T>^)XN1iddUcMgaIkgd)15g^ANl>ecInsA}fvvoB-?+hX}k z>4W^F4znopFw~dmvBg#;eH6v5t!Wnkm#I$`VB<3(W1$?qEFK3(L$_k}ka1!}NR3X* zztv3mZ0-{r)=;XG?z#sz0nO;_*$*m4^~g93$%}rC7G;tCc_t)m%m{K^aT7B^n(TX` zfX3#L@h&)HqR->t;wcxYfPryg?@2k6^r9^bp9*|r;+~%?)$06cz2nngePZF^ths|t z37`5l=$*gwwwd3C2t99v-W61j_jZrr=4vi&_%JZ2!4ln72X(Y%N^Fdh26eNVVx?(y z1jYs%ak0Ea&eleV#mR>0`izQmJf$7r+DjJ zm4Bg_wqsx<0l|t~Yi&2MvXPk#UUsZe`b!1N)r|goX<)aQ#%VMWT_+Lw-D$*kP(`Ah z<)&E51m}>X*0O9*$c?S@-y8v)Kry@Z6}8c`fKYgXrl;YRJ;&Fjb(W6w=Sx!3RYgpj zN^5WwRZ+4kA}${Xk1xtY(znGciFvAeu(m)tmK)1pYPzKx(e<54&DRkG*deCe%5H zLO>E{$p~Yu&v1ebh9wAVH($l)@gK4czCZR#$kVUJ)=1Iu-ef8%LiV{p^=zjT(amVY zdMwkOCywKG@$7iSzW^DGi5)ONrGGeTpuF6~=BtG4PEVfKF5(qHZ^qNe5|lU$3IjL9 zjg?~j14(b`y|l)|2xNoCd7IA$p_!=+%Dm-u#`teB`VR)2ffp3gY*9}*GIKCm-nNeq z3zy0c!Y;L=H@4AjIQo!;@i@Rp=)`bIi5PEg{m0YD1XHCncA5gS<3&Pjto&pg*M= zfE7i4QZ^*4Wgy8b#~~vb0@Y}78FE(nR^wOlq@yQ@XB|SYe&e|pKo35NM3;1tiPg5h z0Zd%BP%btepIWuCoriMyvARYs*gERH%rs>DF1_}nMF7nPkwbWJ$(T@hS5pM-!yfo2%kz`A1G5{-DrK*U zDHymu=q1^q<>CU2j<2N>YkeScB9i*jzJjFz5qxZ=p_X*+vugf+A;3DWP=`qR-f}%Gi26;=bwIy3luN^o0&t4ZBl^xg||& z0hAFg)8ntj_O*LOt_6~RYGMIT@e=vagm2s#w7GK$-gJ_rXND+Y!=fDZ^mR3}*M=5- zm9}{QqvhQp-u0)bvS%Yu>+Iw?F+yJPe%tl*)@847dFIM<*I%Q1$z5z9k(P@BhpM2m zxiRK%G{=KpLUtjjKzdgsa+M5YaCkCB7cKohoJ2H|GND{4G?f*%4+$O;!uR@W`d;2vDD1 zrefs#RX^e(UuCY$=n)p8?Es#Bz46aqfgGQ}gl>F+-baNFOkfVxs8GkR_u>~yP49^A ze(c~a%#F@Czq3&yF;iY0xYcrfj-}bdqk?0f*NEDZDl73YFAXlhNCdYQF)kyMKz}8gGHtvbRf(te*n^?2m2*%r|I6vddzzPa~$i%X`a zd;*pquEPF5_TDl$k|oRY6*Dt4Gc!viW@ct)W@cul5;J3onVDHCF|$-sz1!QjcDi@w z_SpNd`_}qZn5CV_aL-JSpP%$_|Hlb;Ej#;D{%ErGKJOonQsZozHLqTU#^W`vu+FSR z7%G@a%c@sO>rU7%d3lmkLl)_T`-9n8y4NJw)cHDo%oFMH4X-34yiTeg1t-{Z#B zQ=ORcTv5+SqNxtb_1bkm;2Klj88KEdzHvhWBBP|owMB9YK(@Txjf0k4mNNL;rSG<~ zJb!XMNG}qx+k{sx$yjK2Fm#CvM{i@`a|bNr#c5Mt31DnKF#E;)A^G+c zDcx#wPJ~5$Bl1r1(;1#3W4l)TajbGY-++sK$3R68!YnnL57Q>jDI1yF@rOQ3o|Zqk zdWBmLaO^e9IO-c8bHUphM6TVCPO41G+lZ81jNU%gNS~D>AR;k)3(uXYd?XJxDTOY+ebz3=HdE$hxP%(_zstn1sO*4 z0|sNrmmxRtMi6wU;)$n#k*uLsR+OEDJy{UBJqz9o{IIF!CtePi-2fx$o$DTMW9xxroeGF>Z?j}}w8^i_1 zBY%CplRf2&^-h5~-uO{aEE#H%sGW=}S3hfi!C7CqeOH{SC`rH4WEi8r%qu zim5L{=$LBF^f;68rCN9h%Q1&c)qMtEiwaSr*A`BnaoFs!V1KBbzPH$1qmmW~h3y9D zkPa%nNsh6%R7g@us|)Yo|0i4 z5MXQWPFb6nFDQ6Fs_nWpRr9iw?1gJ1wq($C&wedy1U5{mMFjunE=hGtqmSVP6t4JO zyJ+$|fLp<$N3TelJB zD(o-CgVq8DXqBX7DK6?wb7h$?GC|;QK|(y6exql5ckTkQ19`kCw&stph_b<>crRy|1N zQZWH%y^%fXB+NIO(Hmnn$Byr!+4O;doCVS;5FLM0E2QxOg#y-gJ+56Vox>TR0s z)iLZZZsd^pR?iB{z^_*w>y7)cdzvS3lokknsOKjt*1f6$bXo9oE!g)mS9vMBv(U4S zt}58M*foGcIOv4tQ2Gw6-e#eerBcvL_uh0r>;eei&`K&`6`_FnLm|?dUDQUHUAdu zU@45+mbOEPs$_l(>hiQIe&efc*(&6&g~NZM`2uF#c&_*xjP7E9rfyKt$DMWU@9CnU- zqaHyN+L`E)=1+`RjGgK%y^-cVZQ-w-z2LM(szKD$y(xb?g$-L>ggL5KPj`%I7LGxM z(`!Mfqt%d^=5r!^Qdfan79B;V)#o&CK7f~gBtio9wP=BR-as%i0s5D%w~?u}cVGgHn?FK@hW)w)Hx^WHjm29+b9!^eh4D#u}jqvavNig&?+Ir2}e) z1@ptpUwX%P3vIV59Y<+*9z1|n?nNWPb@y&u{8{9NfAjXZUt)+lndz%Jg!$6Q&qg`T zyA~qtSOx!74jQ4D{y(b$V$ZLmN^wmP#)=Uq>VyN7wlUieIomGa)=Cz5Im4F=1%evXP~xoUj3URI44{iXa8%%aK&<N25Y$!MJSYT$ zEOfSlK^^IRwS^m^PM-Ho2DSt^+)uB~ak3?_l74O=)#JqD{me3x?D>;j8HEbPq;nss-|K+L z-=Ih<@cLFvzTt(@W-$9u#d?(C%0GtFKDt|&sU;lMbwG)>@{h*W@J8X-S|M~0Va!NI zh0c`^&Ae~yoXB5GVzbC~`lgaLAfE+FBeF|{i)q*ygFKF*5(!YO}@BCL3N!EVws*lAAFM>(iW5dE;4x( zA6-8hbeqG}0jUm_OB5LiT+_K&ROXz)|4z9}CI&_~e-1wb2Gv3@LEbT+i?*7NAmV-W zNCvNMaV~S}KrUW2HoSpz{(&Y>>Oy`gLo!}xk)Fe+VOiWb)rUDdS_NAoz*0?KnCB{R zGam!CJRT)v?Fp;gzA(fr{1Ss1YRpcrivmLM8 zp@oC}?;$KAM$cAIy?TT{c<}4Cu0NxUg5F_Hr`~k>5;oC9S03JHQXBfNPEH_9nN~R@ z6GZu~)g|Lr94o4#9-9@6(sy0rR8jR`x4OXRDxyQPX@h3-L@*^?n|ZLs7%ZoFxY9u0 zH7G+-wTy0Hk-jG8I3!sf8B((^u8@x*J~$_NoS??}pz86Gui69djycJ+&Vh`}a}k%J zDsQKc%Lc732Y&fFvRZXTZ7o_OiP&^OS?SJuVblNa9T zMhnfiOKq5qc>=#T>`BwwHW4(QA(5}vLQ#O3==x$QW{n&JE@zL*2eR6KTNb^@jq1?p zER2`w{ar!KGx=5=)NhtB!Vo{6tA7UITMwwV)x;7beGFLg>LRT3vcsL(dzp&bg&Hy9 zckLzNhJvR3L|h>7^IrK$dyx9-a0XtAcoa{0OYTu^CX|)-4*h|X7Kag+k#*d@X_m9I z@^t07@}ZOJN zm~O*7vOpj0o+67Hr_J`fn6GVOm_>v=v4te`|Op`>Y1*XzVM zT*S{T29$d!^>F4_cjjcvevztmd`Yd>gGv!#NEMPY6&BDdL7S(6kO-vG0XM?GyQS6` zPZeo~WwL~JNH9g9Y-kRDOz2E;Fr~l$CQ&Rx*~eA=ASR@2BNGV>pL1;my(+4WC0BN# zM3lMITsBwE^ON_=DNJ&o0NC(y(q7Z%r$_q#JvkJP*F4S9fqn`@>w2RG9 z!P17HMq!1z?qPP`!JL?{dJIP}+mgywyn6p9PDh33}1Gg!2_^) z?FN*%q$EM>Wpi-1YgO!GHC1W-`m7EYEnHPAQf>>nG0)9q=)EDCo(lbsObf+F)*5yP zJ9R0EbqEcd0L@FsF=S+P{ZBS}^XP$n(9KrX`T57X3YAwFA(Nt|+g$J3IG?4v2bbBz z=%40Q?&L;;qXIu<{Mqz#GQyPWhSzB2@7hIVi6;u$s3+Nr#SU%P$5ZvPnDLB>QN2vc zpEg_!wRX~yzL9*NwI75zzT`xiFly{THL)-anin_7_ho4SsOMPmKZ>Rc%En%P#6UU= zuC5kzFKa7uN#aIrcf2_Tc<>dp1$=(hWwKp`s_Q`$r6d_oW#vNcaO5ygc+(#Cq-a zx)xV(dw)5o2P*o#7H6rS*k`09GCLu{#<(e*G`tBhy60}RFsU-g6^zSEedC(feTbtW z_Hizt$O55+g4$~DI~<$>eN#Z}nth<(=TE4USGBIxFTR0PooaZ2z1U0Tg5hN*D@ZmY zYtI?xo-wZDvb{YoZXf{Ms<+xa zi&6IKsePz!hUO7lDs!aRTdzFq=1F-aR>Att34g zvtfnnfEnZ0wsJqP8crKlzOP)3U%t-F-;ikA^q0$7aIYZNLv1P@@L|g`^dZw|H&6_p z-97-M2d>l6;+O$h<}E}(>NEOdk@o))N_WhU?&`%<0Wm}YC%lWwU$#>lBT)>$&BMbW zkMA6BBiqR+m<(^_feWR}P%g)Fx+3UI(`%QON|7fz%*grC+K**X*Dgfd??)Tf2CWJe zkDN_#IuZOmcbeh#(@m=Q#6yfYtTJ#!w_kw;Ll&J9|B!sER9QQZdk|n<7j}e0T`t#8 zUqlX2cK-j;+ZEZ7UIVXY6WJEq>S>uc3I^2yVd`0jxTOD53(jP6+!f@KS z2Xb>lT7*R_5}r6UIX*AWW0`scyUvt_P>H=FJXXYh6+N|?TVZz{ES3HIM%oWVll&C} zTWx3GwtD<#WWet)(#$GuE_Gl$MWrxvH!k?1lwA-Z%_-#GhJq5)jh!d(1{X-;Cn|X; z_`*jR`F6&EF@A=7i$`WU@nEI>GQ7fpwyfbtkdf%3xEFN*QqnP#P?4%3v9^C!!-Zq? z`EKhx)--%fla3t%O&Uq5)o4r3kGH6s*Jpg1GdpJlp>jFJ&U+4nvVgiCbbciELG7XS zla(8Lsp{7n1@m_58Te9m#xm7zJ`6n{ZPD{J+f=}+AxREs7xU4;_=gyRE$^y9V`R<5rO}1PLI1kS^Tp9_-y^(9=iPNIbU$F{;n*< zga0GW|J&zm`0wT#_{HIu{YxJIhvpjiTOA1e@dEl2@;|I`jzjeK3iw-5|Ca54Z2y@* z#;>IR(*Ae;vHj<#e%Zg|@t@}O0Ac!j@oxS}YWmOR`=zA)m&N-(e(=BZk6#>q*}vrR z|Gj@aaQwXj{#MlgZT@ldXAS+bf63!N`3JZ7-;3w}Z`FSM;_%DAQdXs{h;Dk3UcM{~B`_q2Jv(Il!nY-#bD# zt3NSb)f^GoQhzTa+k%0)bqzaPS4Dp5q?I?bpl z(H|gq1*l#cm&N`SGh8-)nGZDZYkd#5lvm&Z^!K3$c&l|JlvE2opLzQaYoMDV9 zz&W7m116oXppOasvSNU;c021Z*MnmMsHF(L_?N63UBu(_46oj5um1S8&*3Q%KrKsG zhST_`tz8_co%26d{g$t%3wh+;n25&sm{D6+?-Yif6`hLO5bEh8yDk&J!57N7KR7Md zVHp83APYZoJ!@_S4Vkdq6LejOIn)~1A(}k2aiUTD5V4(V>6L%ZBf|a?u~=V3eG9f~(4hH38U2YX~VlJ#8J6n*71UCSl0%E*$!iA5>1N9#|HS>yb73Kx?I&wlu+ ziS<{`G(O?l2~wNuh<<1q$?aSspFGySxzW^+w7v@|bn^J_18kx6uHYfCrxL(-fPIf; zl`TSQ;QJKlA*fH`IzQwR^elH%8Ga}o8lf-b1o7f8OmV8nL zV3uO~+C&-xJR2N*nD`$+F{HfINq3OA6!R_ zIwtGD%UhIC{;>?8mpz9kFS^xTT34b~R&;3cX?MK}#Cx3MXYxO`&^UpfoaFbOax`&!7 zENqgt|0$9r#%+o2?%}9z`tp|3-PzK>Uwcp&ac1*bged0PjkgsY8hi14FKJ@uOkPer zW+dvX9H@@v7^4TgkwF%v2smvI7lpMpgZC&Dxr9haZTqm?E-rc+g@Fg)JE^l2dk4=v zgBI46Z^jC(ci8u}P23vk`%726FQQrf_xobF>3y2yf~*=F=1&ooFcw@fQ7Qq{iO*_l zk_^;Q&06CVe$`+_Wmvr`Cpo)<)K)DCDAIE!%fuM-l%RmL1KNCTf{oYQq0DFr^hGM> zM56fr(;b7qiWg}6r(XAeluDZF@4c>HM}Cci|EUx9EAhAeP2e|y-voXW_)Xw9fqx?c z;D7W}{-!ts0pI+*!;pmJppXD5M6cBMCsN_Ct!P-!;{Cc@OjG`nh*e+~+St8cAn62@ zJIB=ov>|}$?U&#kErP;`M2e;$BJ16XiuhiQO?KdsyQ(`pio7g16V1l>;~s~w1eW^- zaD@og9m%*e2LVqBeUadV9w?tS_^dij%jS9$*90s$Da^|Ca1iTDxMGrl7logf8i$?y zoi&I6qM;vdUHmqj8QOpam^YmSgMqToj868DYu6lY$H8lISbLQOf~^r17~{4v?@Aay zYPzSaUSTG29WZFyCGfES0K52*fDXbCwQ4_fl!aq@K{5(~#av;RGBM1Vw4!}ED$3&c z;`16_kmnG22<17OYMPawH8_qPpf4NFh~vKCo(AMKV4QyLD)VzwqlNw3ppT-u(>+zE zwacxHhQ)Xv{+@Av8^!+f$NsS-vz_am92Qwi6JG>b@Fmd`H{S*f?OeUygD17=757 z`pL-Lq>0LR_*)9~$v)UINHpF1c!%)8UVv)OdJgDj>gy?9JO=b?P7 zq&yb-tV)b9aC04k#7_vx_@L`wUa@N3>i{BCmRb@LiA$eUq3^(L(FLhyE*K?*8nJ#8 z;|btX-5kA&^`}klwv3(7lqh9Pc6oj#uMTu15hl#6Y(~J>Y^!giGH58(6!cP_aXFf> z>20uI?O{%i-xj9J&P`RL%fq1WbO!g@-v%8vo2MCcE`sqW%Po) zlpZW50WJYmvWywZ*IBJG@}m}1M(Z^mZtT5H?)_0cQrYT~-5R1I5GyY3%V~!GkdO!p z%($3Eo?@jwV|?hgaypnO)sH(8DCh$ko-h^VBJ)LbCCJr~q6h0OYR&+88;S}$qMo`> zmOv?;8+LA$lw*e-OA2FRHX=THbpq85{X9RnA81{DBn`cq{?aBJWh>W3XPm^aS8Y)v z;=4S&f4V{3H1eFgT3x6-Do=lJ${SiRau9F5uyIdWs3VrOQmq+7|EWI$+MOQ&WOtTQ zHp`YmEQY4J_0#*Jp&D778HVnY0}nMr9dwWW<9MvD)a`cO!kkW&ZfrSf-)b`W$+LKL z^(5Ue;Cy(Jcan-&5Wc^rL#O2iTgQbAZNvf?LEc7pL@s*paW>je%4$qXTq7PXsuX z_sdfl9^YZ>eo#wJv35oQF!#f)Pjr=QNGmJdRZ$nbEndPu8QuB^dhNTi5rZQR9twah zNrfU<9#)p6ggBh|h$c{Iz~c|T&(9Qt?zxH*cyOcg2Rm@ta9g3BSqv3!K9%iP>>tM) z3K1>?02O@?3gl_qZo$_6_Chp1RyFvUTdBtG$mALRjNKU&^D*$rT6`{QnL3RHSsK1n zh$(@sIWmaGu8vI#L$$k@6|i?@qSeN_hsk&+8qTy5Mx)v*E=5v?h7=$RbMU=LNlZP3{ z;S7B**1eCP+)1sy=^6%hqZ_Go5XA}|7se;5~kB4nfHDbmL1{t z%H(b*7R)a_MLK>+!(kc4O*h$xs-bxBvUy-MkiJ~%&U{`S)UL8$;*ww^5(vS<_3c~+ zFHI&VD$y!x-fRh6o_x3F1@aZ>3Iy?s^YXck*uydnmuoYSc^oQZ_U2JbS8f?rmh6p!F1js1C9Gp6H&DrUy(FYe5H%0#( zv&37Y#pZ~MGy6g0(%7pRJTGy`d^9Q8-gr@qc`H&QA_GcFYUYMOlN9v z3;T`G-b3I{V=wtYtBDAgG&RHr4D+6BRu;muZGR~TS_G(>x6@0)!XX-mLBXI+iXsG% zVE7aq@;feQR)LLfkK=W9peuAZv4$7zEWMx8sxHINeFu;X>NRoW{D9&IL(S6=R9p#OP9@T^Bj&qKp!-?|2y* zQkn0B+K21AymWY{R~+jl6gcX)n?qyv$tS)&D5M<7s{r}{X>>fCZc1QU;?aPR*xaOeN9<_a&g_zbDKMbY{JB|L+G+r-#^jQ?R^zyr@OCdVst$r}uD zf4Byli6*dr8?)9{Y)gWFfw_tDW^g~Y&_30on-w+?0S7V%0X*6EOgg zrxddrW_J;TW0g5npJ1Kv2EQA#oiDKQx(|Ox*tD@boXbB67{fid0&i@Kg(11Wi4E1G z-hU-ZSsG#Fg5Hw>Ir*S11p=501sKUCz@5bO@{VFUTpEMW<&&dyP+tImFxKd9zls_663rOKvV!n-tI@ z=$TK}I8(vBcZBVLtR7HPR5rte6}AnAaCn2i%#`xkKwm>+FE)d%@*_HiXy(>sH72Q- zXRjV%1q1h3Q_OJ|a<1_OX)GgQN{CeFaJE1jo_`3Czzvei9md0;C1|jt4fmIIYiEzm z8{w`YR%|Vps^YTCRl#ecG4%BXIDd7$fNhGT9N+0IA|4iSbD3+Pm-JGidj8f8Ww252 zO9_Tk>g$(tG+nEh_j9ash*WlcWf8?C-+KZ>evhuukzF#gRlSURMT}3xedSy7=zilw zZ(&Us&)`$hZ0#UZ*4Q-1z<7GDS_=Tdh+Qr7P2jgmKy$$vgVj}1ovwJ`2`~h=F;!?5 zKf@GiHM^;pyF}_Lyi{RmwENHrfYFIsN`;J`-2e@DdP1v)?@+SH?9A`^@YyX?#e1?? zGboG#dEBzNdC;CnYi@jY?KJ42QV|xECJYsP@$OKW z$|x-8G$qv`G6_08<&>nNyBG zYlE=h(9Gee`A2Bn_Qq6MIb#vZ7!=d_=iaaq3y$fe_mBJAw{Z4RO1}5*< z!l2~AYPt3nngatDb*|(qgwdgc9K76Hjra5_hMI&0`xFLvI4Pfz;o;5+q&V`OkDqsW zCmDHh(+Gp)s{<&y?75Kd>sWKXpBR*g4`ic|&{*7^M zdeyx#JQ%JfSmhT^FZ`!O>d=6|H+~{#wqD*nm zn`fb*)5{zz;-vpHQHHXA^F?|ecGN~T7!R(*52W34p}3FsqwNQ77$xS|Z_G1z#=-MQL|Ez2O zir@LIYyTU`7QavBH-X;-eiQgj;5UK)k^tBrVVVC^*T&U@mN?SmKii4|8tB{%B=Bi- zg&KS`SBG4LmO=Nfx&_fX(FqmM<3%ew;Zd9lF;XJA1#H_w^r;M^A7tiZUBCgV(43gU zalZoAt5TF_VQ<$mECAUd0|=zJjkIXEP8h|n!UlmR8_^A(n6i-dUqD!IZK5tZ*f&t5 zv8l6DAZ1ts@eOco9MuXCjPm{xZ7eq8HQ1^e%^B`XZ(j1*=_86y9E;K#M5~Nu`EG%1 z_fzf5v~Q(S27QVr6`7Yx01G3jM|HleL<8eu$3C&)J_dgc&GFgY&%;4c$}KomT;h1- zgeBHu<5C8XtHcvrMN#tIA@Ok@KoKZ~oa7j+tX448nGb$f+s#%p5aq8^#&c4Nz*RtJg0{ycH9ts0uI}lJ~Mk6rc z$OjrtzT7$s#^iCy5Ge(621rU7Ymy~$&||)^$}}+{u`d>MCgt@hT~|fxA_4sJxdrVH zHMkz10Jj_ZHfhrf`h!e*R;15;KRhzvpeWCcOwJmL(Tk98ZV8YED5+eO@wptSxJG+7U4J~q0f+I-S~LK37smPhNEBWI>r ziW;F^t{0Q()!xk%zL@n|PI$JWW~fChNTAoDYr=lyYrq0BR2teQAovqA-TGX^tri-1CMaQ;czvkS-wV`o~+zL5OB#1(5MJSrTHfrq`-eW`y-~jUhug+Z)d=uqG&daTeqBbBLX!E*zC#pWf6!j*`;cvxl zbSN?HM`B#BjEcTa#B6(r^#fHnu2hH=ah1XVdxWA-<%h?1d*{$>NSveM?Xf8mud1N>0?F}<%!J< zPGQeVVgWvX^m{jiAFnZ5Vc|~2#E|C|RAWXXMM+s3Ok6;{9j&p2Jjh`>f0cYP0zOJo$-eP%f4Yhzh@+Je2x!5_8jJGi$ zA3Yni{vv2Q6m`X?CyMZ5(Zc{>x{VYoV8m7TULZFnX*H1r<{HCA^s{`}!&b>Ex%8yo zmmE42uX0ILa=GMFkW>wAZ{*7TjlC54r5VIO}cxM%<=m|MZx$D7RsD zDrW0_3j9u2kEJo(Eg>Z8PcA6CI9Goj_^Y>FKQtOcd`P|EiGreQ@r*_?k47h?CjCl; zZ_eQ_53i@yLPmXIA?=jm{Klk{VG{O6aq*Z_d67q!V-{LbPN&UnpsU!h{T`nm4%V>x znIohl$7^tAw{l(sx>Bx&ydLmUBnHqVifV=;^-fT-wl??9*{x`CCa4tVd$vhSO~Z>B z(D3Y0;81=|PR4zpJT!$30sA{>Sn?FbhnD*7p08>S{t!=mAAYFMyJPO_7m)W5Auxo4 zpW$^Fc1~8V<}h_##z%4m3y|BbDRiX%wG20_|bdrb++w4S8ecW9ehzzO4K5yp<@={L>J`RFV zB+Ymv&9Lg7yn+lUZ40RFuxq}@nty9z-9px8DqKF4BCggq1a zb8J>2@15Arep5I1ClExn0;}H&@&Vi%bPrzJxh6PU3w&D%jOj#!<=V#+4QvUNAC5AF z{Ip-SK)}7h|Jz}-;`Ze}=2>!lBJ_xF{hpm*h&QTfFQ}kpCkkAsW#U-R!BRdX9eIy7 zLRnai2<~f$O4T#DG_@ zZ4v#=S;?*LuFI8nC)ZY7%^|0HlOe!bic&D8Ot~y(t4@pb6XUx=B)%RGuO6Q^B*p2$ zDnv1@c4&Hzt*7&fp)inQXoF*xoGhRBc>ETG8e`5HzVGMYcAMs}n2owKFWBQN-|(j= zsF;JGGKaSo0SaJ4W>FohLgnvPxqCU&viBRq zqosL#81HDwjiYSByYL@b=K02YfL!(}L1B4WzqThV&JZ$r;bc{SN4j6@bz>WRLS4so zZNJB*5F4|xx#8I<&Q{I0;R&c@`$swnBYW!1L-oWBgTS45gC_gj5`}V6(N?qNf>7=m zWFs9YOGAs;20$l`mk>Ns(=LT+Fnh%B7Ib=2aB-(oHJ-I^w`*8GgrAFHgLpOiU?0k$ zl7H?W!6`9UMicHohlUGh@f)E?jEG4|`~bPu$TB3KN;=dO*3F+}$WK5@X%!%iAbPr& zwH4qdX)k`m%_AW={R~?pO(_EwGDva8Tarz3bs1>^He^pprtcDBikj^o0;-mk4?dPu z;-o|NOg8CAA1o>&%jESP9y!%GbJbdiaoBpTyAC6b1n2<(u#4>|^5FS`P1g0+wq33` z7!<|U*C&jaA-d6!qFb$>R8oP=mpM#6+rC`!Lf`J&6L2%Zx&`v&JL{A*>@L*`QZXLe zfmYtOb*m0a4OeP*SLJ(CJwt`EiosA3X>t*AAlf=zw}gz0o;QF&CHt-x1QWY%wIJT3 zE91(8Ox+JMTZ+Ny%_svS{bh?OCuKs&>dVi@y_2Zb6o0`qg-tKgrQ2(79&*EVImdG} zLEyD4*#tdpF*jcqrKMNab(^md<-*O9s8b@HDUyV}(e)9?UlHZsWoacuA!~TBZE0in zq@Lox>}(BQnYAQlv32mYaDSB)7dNP{ZGy=^h9a~%an`KC!9}A3$y~Nd@i<}PI0+GD zkFM&}htVaRp2n*g|Jy1z>%3|eohCPG> z^bu>4mG*-?YF6oY3|bmiD$cjfKnVf=Gi7R^U z)~aDL!|uYSOHn&cgq03`r znAb>8EZv5lBBut?83ZjvZ1kX=%B*7Il5)5p2+Z+W(57fQtYBAi;l)O{_M0c>l*hF& zC_f;EGf=PLOPganGCu*s?2bC{+x0l-G1~6Okcv|h4C$76gr@>;>~4|am&aVSGO%-Z z8GPg2-#g%V=%Pv_6l*&_v*AB6;0?Jwx4=vpniBkx&w+ z&5JTV1hejP3WFtBx6q=&0PM}KFNS=k+xd={9fyJQnL=rhGAbomvZW*M8g579`!*}t zsDQQdBkRFM&^SKuvb18+DW*DJg=(rUdGckHHOZJ;|2Fhl6rCVh0fV#pGjhJ`WP3g*qRL8I?D3#_2ZD!=60`6O6f}O9G6Ub?q zi}Lc>t$d8tXc^^|HhZB_4#H-HnjV`)p%vDnTdouw$BsEN<;Vt&so-vtu&^DY%op!@ zT!qP|l#K=(0M<@Jzz02qX$0w>4}lF zquHF=Hu|T+JKGO%=?6ZoZA?S-enhrC{m9IKw)L9O z<-#p94hi@Q;gOVi@J#od-E;tW${oLRMGWLHt-v52@H-X;-eiQgj;5UJPBLd+5 z{~%*;+GLK(ROC}(7bs%m-#S)^$V%_k`{LqTR(J#=npO?mV+P=#q-sy0pR%Ap~wPOChQc_;v}5OI1U= z)U!KaOtWEcmE>HT@`Axd{2e91vgE^rg$3yOnz2P`=q4Q_ zB4ON4tZ9H@u&&6nt>5uffyPV!fi^8o7f9R$Z->zYyN zaUy`SP{b~ZV$%z4*T-yO*{BXEjo!Se-q8!JCQpm_s&-^ZgOnwV!^<-e^WcT@m|#{Q zr3J#EsBim>z#N9r{5Z24QFrQZ@+bGC-0hNyaOOOh_)RQ9Pg=MRF=cri z%pImes69tCK|9^$^@#3GH{)#yrJa9;6o%agB%hnGP*rN7mu6T82_z9X-?*jz=1#`Q-L0udL7qBX|>e?Ifi#yY8_~$Mv}?mkNLI_zAj#=#(FhO z$WJR62Gvh@gu^nY*0^=UVAW0=j~Bo6$Z7-B`c1PH&-S)k*z4usE?d{gpuo+Q_L=K{ zzty?5uiDtscW^s4CW!?yqF|?tU({`x-AcU(V>3P7}@zw|EDaY((W9v+87}I|r)hF>9%C6O#<))Ht9R-BuDb)1Z25pYJj=Ih7zbPTix@ONY z7KX4Qyu`Oa^Mx{LqppGGr_0$pzl!wU={)cP*g(*orGA2^?y^TZ7r=dl5Ol0?6nZKp zgV-5SVuoLODn&w$j+tE@Ahxjgb3&*+b^_24$Vh-?t(QmU5yT9D)&myL3k>;q8~R2B z?DM=7HJ}}j)d_Pywebhw2hFn~t0h;R(Pb5_Cw$N-l&$etk$rA-GHX2*XdOb3vVXdPFSYVq zP4xzuCB;s}M!OnZ>EQC`%>@lfM=y#++xngzn3l?@OQx1alu<{~eh<;%aV#Qy8mp^l zwU54~A2aw(po{yo9a&I#z_~&XHd#z2VU4he{_Y7Zv=F$=F1ObRfOFnquts$`3ca0c z9Ply7D9vl%OJvr#e`yaIhGv=jOcl!3&j~$27z{O@&)P9 z{I9LZCOz7_EXWyje*3E{f;$N_dJ&hIaOChT8H}qYJNXe{cbZh_;j9gM;Hqa`8k@XD zL*^UdbvcRg*e?FSh?x~<=55VeKS$V^Bd55VCptjjjHyFBs)%DX{1on4V-u%rXY0~| zlGV4n26yC<+INA+*luP4`P3psLgYt0OYmmXUr_njCz8iSG%5v0W(P*!5HB1ZqCEL5 zu7(DQHRRFp-Q;A=B#UGjb~zO@TvTLng-u+8_tObeQ&_M!ptjSAKjIuRWs^$*;Is>1 z>6pH=vyrLoo-64sZ!q?6%uVxZic1PD=IcI?-o3qt{A8?DhT_b4fGSVv1ypj0Wuu(s zK=pQ+BSolnS}f~NBs&2Xu?{dDyyP++#SO@g7%iu>dCH#lX+0Tn`ZGLR8R$Cr4^|Ni zZ{w32Uz(M#LDF>h6mBC^rjW-=TO03k&7i+sy38bvnF@S$w}0%3%q9Z_84XejAIa)9 zs5qlo`??!eyxNZs<}PC+yY*4|CJb1=I)RpTgW+RmJXE97>9a|gmi0*k91kInEPH-3 zUoEVdsFGc@pG= zP%@i#Y~V+UZu+u#nmh4Nc*L!Rcr&0gW^(*emu0yqezmOyCiP~+qVyfy>MQ+jM180C z)%><+L|kJ@8_wQ6trh5bpG?;BJoqtf(3osN&r7waI@yO(7iSCY)*UrkeS zwnv=Ff*Ikk&Kv%p_TB*mEKo5sGc&80nVFfHnVFdxRLl%gF*7qWOT~=(>ete< zdHbe&)@;B1+1XW?rFo=Zk4U!@aWg%Sd-xR=ja6G2_3?sp?%+0)TI}$SxG%*>aGQ0K z=A6m!NgCbTOTm{4P5|jYy?L!~jol|5Im6634%deVw_-af%Zr?9#R~)#bBN>IrEF7@ zY77%cfSO~PIhP@DMVH$vgsBT#ua24)ZTdW~EHE^g`_9lvYy_6qzEXXR$hSQ`6^V6! zO(`tqVEJx{jmZ@q-{xyrLgik;4{dF7;RtNng(?nMa;XMSvkbNNF8?3`+y@+sIlr9S z$|2=S>TPvV-IdC1$*8d_Acw4sh!F~*kfc*4ZIc}`9y+yjD8ZsBsAo12F*^>#(S8&A zmQMfNGeAeGIY!0XM_Z*n2ux+KWVaAvj~UoGBs1w*0<9_ioTI=vr&9IEe4bmW zm&>%bf2K1+-ZIgpugcDZ9GEA4$Tfn#tPx;Y?-@O5YXB^8J#6BvvAW+J-)$3obO5b= z&-E2R9oU$gEOlxD5RfE*CRW-OkqR|aizpv7xqAi%1-hlji?OdEFzz@kKb>E%yFNmq zeEj{Sz$L|!bwob;N0_?=>>S%i0_R=!|!8iQ#RfU3IXWLiLiizw( zI!|_(3h8@wjN6`CWLxpCY=ZBtKT1wTUo|knx%IZF5SdYE?WC54(%HwNvKU6ght(Nb zP`5FfAfq*=q`X>6Jnu^3GOtei3-!ohoUDZQjE)+%=>-9pfKdBQ1|xJm>XbdlsG;c6A77{u|vQn=`GGIwo}Ykkr## zS7BQpbf>c3USSx5Uf4~h#fLzc?!4OUV1_lPz?e9b6FSn14=GC=RzOZh(?!zYa*h?M zBFNuJk_fW4P~OqdK0yWILJ=3)7+!7Yv6Q%NF6(QT>t1jkZ=V@b!_!VnNz!=EuaIpG zNCWxtGq_V4X56VPsierxFlm#D?#J6HJ1-<`U~C<{DU(_AU4g(0jwY@5;yK^=v(4vtTtZP>KY;$viRbHmD|g|sltp?U!PFmjG|5Kduy;Dnm)=B zFZPDp&+S4Hu8#ng2kSx=&vit6b)Aenrd-o;$7(7XjPtR0jGi2MYq`U$Y`{n?yAf2e zk^pEp&75pLrb7ihPKGC388&u`Dsp%stFpSP;rjI?h+Z%{%hf_k=OGO3|Pz*%mUi zKVQ-yAk+%T^z8KEXC5HpT;Qiq?1K1O&7@j*+Bm{9Vl3jY=|==ApO${Stk*k5i-yzd zXg?E!08B?}Q*r)1FUyA&jdLh(t7a%7ig^Yb(kFy|>Js zKi1_kCi%9p+>onu>Z-Qk9DUmdu9ZVozsW=x%;MHgG{M-_sj!eimnl zd8LmOXkPS+7aYg8CT9;rtU4I8TV?>BAu%cm&p;+#AzmcP?E{HvPLB{;@%3)j@(-0b zV`L}tXN67GmMy0EJOfON#S-C~+Y8Ta#`rblatkk^SM)Ypoo{Dfbc@eykGQqZm8C%0 zZ8o@vHQVU@bBoEe)=5Y5w~5pA>}_nJGcbw94a!cF4D(vJjE4!muE2XuiZmzGVKJQV zu}@cqkB}>kkHo&%63zNqXOV& zS--w#m+(xWdX$p;!tW?C*dW4r#1@}!+2F+${G7Ug?PP-^;*c!FmtC`Af^0g2H zYJ4J&Rm(d950szlQ%Evsx4Tp17@e*8j$D$v4P7#AnaPF5xeaziu!pQb@`Qyqif8XF)sGV^c7kbbRP>f<|KJFWO6W6#Z$ zm$|a2W2QAQ`JwiXZf=vU%9Ri06W&N}0xgZ__RJpSA=sRAK#nnDcBxd<2hCIuLOM?` z@wzKGa3{O{N1`m5!T`baYh|20fH8J9y`b(PKO1Ul)ScAMxBf5DCE{bG3MsYn`n^$= zMhTw%_s&)P4Q1`i-E!3LLPx4;)7HnJ=YZ0QA+GY>Li$2cuQ#PrC_1ud3owLvmJSc4 zKm|m=?Ld@lXfKT;OftcYUgt?89N%Qv=!6rXH2$krmcLr^{gQv#n(zNmtHM9p0rLHL z{ZXLz$MzpuosyaVnfCa@)j!PsvHg21z`t*G`d7J@>c2>ysM`N)uH_d_|8(G&BfsTu z1b!p%8-d>l{6^q60{zvQ-Bk zB6E?Z_9nNfX5i_}RYF3*GUy&l)88otsUU@AY^WIW#mJ73d&5UZbKjp z^qKRKQpg8ss)cjy4tbedm}xtFfIGOK8J;Q?WZ_G@lXfc#w-|-6ZS1N5l82i?X#>a-cBxv zcRVvTXFPF!?Lo%~(JKJ{4K4**q|WA9gQoE62AT|q8-3C0g9AblEUQOT4P8yD7UZIwS7`7kemhiLwH+?H8Eph$0<@XIqwk!}Q;K2w_f9(k zTu3ehW2N&n$P&f;Vo^chj$WO;8YX63{d;<75F!@XOqbm-;?MQ;<-2Ip85wLL_A**- zq^`n@&TM9348F9StYz$e-azrgu-Pm8vjw*7!5^_scyE0{EM>q_p7#esEnnxaY%fg7 z=SJDJC8Kr_T2zrGzj}!$ff#hI?)NM_6U9b2Q_>s_wtP*IR6WMj?M3Xlyp%Hhbsu6{ z?w`~3cxpco(&C6Uy2g5czTiHK9#0u&KRlx`vOd3p>>t*Kclj_4vVt7=#COl=o{avM z8gg@4vDBB!zIIbfGo3H7LMhPv-?`t#>m-UWKxru85E(xj2bqRXtM zVr+UdWJ~D;Q`5cW2^xPCCXHY}qmjDj zHKr=fQ>^inF|V4KI~^Z}p3Q@pOXUHK6H_R3mCL?l!6Y~w>bOGsPPgsCm(Y!jxQA9cgqKoM41yZoH7G-Cxv=s}Z zsuAG|*M*CGIUc8a&1I@tzwX0bz@2V1h9@kvfJr~VLi_~Lfe%X-m^4(jpR3&`($DfQ zxC$S@EEx~lHs$1A2{ zcPwq&ur}&{%q9wQ>uv%5!T6uU>qv(75gIYvnXe9r;u>gE+d9ZG?P3?)tLVu{w)uPz z*hmmZRfst|aOZJKZB&5ItyX?}0gV%iu) zRq19qp7P4ISs8d)M4Tj6olu^z1iC*5yGVlrk}IfN8Wxgu`9P$kpy@(c07Q7OcUJS`a_veZ!|smo7GT$0&&eizM^;;A23nON=ztcUEa>FAK74rjuhQ$jVgcx6O; zgQJ=3IOsY_S}HI*(HmSP>oi>NZBLzCVIQu|AM|d4t(?i^k~Cs;)k)f!K$(UI+3LV! z65c7oE9#8zn#H|U+}s6-Y0If}{8go6K}Xc~uNUF;7mAwf#cRx)BN!ZozaLRAODutI zDo;DOZgf8^RG;c)SbqPKO)ja>H#BV#1X)oG%d6`;i@*QwZR6DcQOBretgqHUoQMsH z3Oiab9I?h^_2w!5tu%&$AY$@0m^xbWygdyKWKzLOPvmn!mRwKP+{IZkx#a`$$J5<> z&w@8|16`SJPWm3J!Vr@cJ%de2CtSli?5*02{zC$I%N?OJDg0dryiyxv?!i#6cMoOo z!+^E#!ro)&u-}HUwNJ5pQY* z>y~A@45{YWL}3i~CcY{<72yW8p+PkLGcqM71Tgs!pDJN2wqW+Ow7;638mr!0B%iy?w^WSk4%|4qH2l~47rC6TeT7)PICZbk?2jH%#3Gf zm&84vkfgOhHXy687Tb0w@u5M350-GnzLT=TtFkKffIry~K)^p0b3fy6 zo_|}Y10>+jGw}QiPx9~VOW6M_BR_oxUHv~C^HOBcHTWri{EPzVGyc*u{?s$-{zK37 zOV9LE&-ACB`Inygr=Iyw{l{PWk3aPv|J1Yn(zE^4v;C=Op#R6f4}a+Y82E?$@xebY z8$j(J&i)VB{#?)-6hQ8$uWZSG@3h&^Ve;|6xAXfsjE(pUk^kI2@248X$|8Vu6BCb>51l{QdcV&0DsKzqj-IIZTc8_wy|8r(K8l-`o9~w_h{=-tO1DUB>%);{TtD zuyL4A3JtEc1zZ}o*M@vx`_bx|pWH@0VLXx&WC%8;rTf#^Y^oy3!jf^Tqyf5pac%J# z=@4q%hbs=epd%8oM!SkK_V`W9#RRsnAOl=hO)#+X{9Ik6+|-P{I4N|UGAxAkkGT43 z1Zj%fW-zYB9K}a2v#svShLF2dBTFNk5czP+B|t&KE~QIccR$CVhLk=pObqS=^CyrC zQp@LldE*$WgSjlf4cc#R^#?MMGx~%bE6rr$DK_zb1T~>?BMHeFS38MNGd9g9Bwg_h zf$~fXND2!O6$XZG0@@t{{oW)l?Epz)I-gLL-(RYv!1n-*cM(c|PjK8k$ zQEmIJv4>0mW16)2=)LqnoT<3JN`B%}8vg^fx?@3eJJw(>-e78W@vn0p5=)0^3a?!%~ybQRwE_rb*{8?rR zgq%UjdpNmxSQDnQf1qT)wfu=P=jBk27|l5Su8>GBA$bME`cCMvt0uIxy2v2U=Lr;0 z0L(}aPi{{bY}()`SukWXUplyR=bpNwf-Ea~HB>(m=q@GNvF4nqlue@0QPl!}{7Wk_ z;|V(9kS`f%Yr5;iEbTo@1WdBEEKN^wg$Ba;g!TP$cV1L9hQLJrT#HSI43nkKb+J!m zHN(q*=U3o^4_{0-HU-<_W48xGWP9Ycy!^L8#4h z4jZCq>Gt-w1JZoBS`M1xowQ-3+$Ri{-d)*AWPJSu*y0R$X_0zbtDm=GmGP&ax2jse zy%U^34i$kPr9$NSDtS|Ts^XRhAddLP&$@K;L<&3`dl_5xQKI++4;UiIJgvHP_u?{v z`(BvQB<^U=UNM_n$SA=F(Gx7oV9WwzUSDA^IU_j8)&$?5=W3+4JvgIQzmDE0Go{jKx+e7!5FXZEH4wpL%9#wQ2pwhI zT~{enQ8~{)B$DT^?>?sbPE{9`ho=@uUjk0`0H^A%1rny|`rHIq!g+&Zq z1U<%Lr>=m)ZXU0P_@kRScRNKe;`4cz;8n0T9~5_}Zswz8Ce9l)6uz<7kZ`5T&RwVr zQEHjaMdcUZX`!RJjQA~zw_@ak1s3(O>FFu1)skS*d`YiM@(DS2l#}tY=xL{#F?_TT zhe5D1-py_XhACsCaVAcKDwoNmmdZ2qp zcs-dbO7{Za#XzJY6#R;cBP^z`9=whrO58i zR~h(bQO{hqY0EjEj=z?arP*52IN_7#@-#XKXEtEdTLfejPcN0WuY`xrbY2QOU&9M$CsdH z?79rk8X4<0yVP$%cryKUiG(2?SgtSUgB>_d5GaBd;!5AXyzZ3r1}^CszJ*g=@#B|^ zJA9NcBId^4{sL#whmEl@C%-Kx-(m(xp?!rfJ5W;HxJC48Q<`TMq%9rh83(t+#MK~) zcuxzv$Wne2<4b2q|D_7isT~vgTsC{vG91eq61I7EsqvW{r5YFgDduCaSo@{iSpr0f zVXMDGP$Mi+y&p8&0Tx0NabKvDXf7nrao}?wbdHIgI)cPX zrX7!fMCb$7DoC=#rHt9WpN6MuvM(*=@_E3u^Pl?pvD?AqvD*f0WBzom~rhmR$V^n2-jl9Lv9MiQ>x<6CGiVt#&FX4-ODb3Qx_jA@#z^MMl{mqbBv*+#z6&A5Yl4F!oAe^nb{2L7LiQ;rG$8kPO} zck<7$|35$MxAr#zzY+M2z;6V8Bk&u6|M3Wb^Zdu!h#*vyZLujm&hYW--^ut{6=l zkE<5J@S=eQzi%u;FUFuF>o>xl0sQ9r4qB{>J1(z`VO*m$hYSD69V+OiiszY4PeTQ%fw zhthqJOeg?Wn2gFvJp9cG*@1`R)!+=u(ko6v55`&-;l^}l4Tm^#s6EwO<%!=d+lJ*E zM+85(9_tIL3j3V; zsBel(6Xq^H*|6TKf>j**&0-o4CL z0Ryn(f`RW4Ids9lNEdFokl$Sh#WEf5^xat=QmYSGSh;U}c1m@%pNwlfEdYu;k8#57 zxksQ(Pz_GVjzD_c7@6ClxPm^+kO*Po3$h!z32-ua9JtDL>21Z$zPjW@5??2&bG7nF!?qyP}hheq||WWQDq#4 zX>eym+X`Jh`Mrn4I=Z_6><+l$lc_ob?{jsLI965{oqr3G+ec8MQNJhWk;`b{dC)4O z_3XSG@0)JCJu|niGXGQ%?5@7foi;o_Eb0XD>s>RTl6roU?Xvf)0&hVqZWS(fwp|*6 ziOi;wE+&se^f^+ENe{<&bmnQdS~HJf30F=IYkgH?{Z{CHSpH%;&q2U~0DFr}IXBBj z?~7;u?@~1k;}nH%{y?2B4(LYihI-qGDP)W59~bibyZF{BcO);lZFEibDYl4<3k|TJ z=OB_0+iE#ZO`@@Bs64!7;Z0T}I@3^$m%Zpw(;{Q7AEHcLT^;H0ea-I>=(`W_L{;Yx zP?;OMq70-6FOuiujbOA5muE>D!q|ttDOYRZ*Rl<%MW$ilLk4v1zl4X#OM@p?^v~nD z3B)?RUYD|{m#Qjh+STXy%PG84GBn?7ZMiTG2L@(ox~=z=*nuWwjz&U+>KqgTNp1An zRgWY9^6uR4UFOpV%ao$likiyt;gm35r!+pECQ>xZE4-(#@*?GCURpPx9PwF^`tBC- zsz(xJ05DxyFS?R)G zI?)OZU$tF?QbU*hs^Oaau_zx{>l8-IuWO-ozgpZ}!>rJ>#&s5nZP@}VRD|!|M@HyX zkr*=Vz=wjkVnRKS;23iFhjx}r%S$$^w>1=}q3CU=gYQTvu`IaX*D#`6>ts?uID2~} z9T>mI9r8{_rY->ouYX@TG6k9}a?f+>@RK+mG&LeIX@LY=wzcrEg*yW}i27e(P+0-w#i)gs7#eCeGwghpd-NcPJ)r4+12@9S7fuUxk zkof_n*vvfp`wa@Lu>_&cfXfIHjge}QDJ&tJ`~b0H7-QfEiuP?5ac5V!TMth`e2L|N_-_PZP4*^B5yGe|gmCy;6hK!+){>`OVvn`FPukDG&XF39DY&ktE=uVk z^cNrlS#$W=rsc&3K4=jp6N;P>72tv4IL~tq>b6^3G9-3y!Hu2~Kf0c3d0Q{-`FC9E zjHR~U&a;u4JtE}RiZF&JE9re}pnDR735JBJu2Oz#_fuNO-Tto<5R%F!xvsF;8!H|G8@#mBHB~MjM_5j zDX4z*;dBtnPI-g@uuCcHS$8E`%EozViOSexN%W3IFp)VF7S-HMY}ICgxcU@SpUQ_# z6Y=-F1R%iQS2euV;)-=Usxef zRw&vX4K~l$)Qa}OP_(au9-E=h@%qglfAlD(J+A^;-EUXV!1a@_ej&95SDy)Br>>&$ zR@_mGfn|@3JXh;6jNMzN?r7o!b}5(k(%aIjtUM$bs2ngsXWn|f1STBYA<%{O9M+} zoyaL1g)58HU+wM9>+9Ue#%M`%m*;k-eX|(ZCR=ByFWgkeGIL?GW>a!)xWO@3h>#mL zv!6Z&(|V8#`)r?{DJWf#Q2@2=VS3P$T&qgiC?T-Er?|dr10Kmj# zr@YCQT&iCoy|PLv;*d+5Mlu5GjSCO}7}1PlG7O%d;jZ@pNdV=tl}0o>K&2?CRr>9IVQZntK;=tkzp+#uqXB_c)sO zcmaC@Vu9y|Dk$$n@4DcG<{1dDCnTzAP4Ua}M(^7O?)fpHc2taw%?@aw6ent**Io0d&=O0!g0L;T9IFv zWq@llfV^|!YhB%}1856AO-Td`=tVH)#b@K|NbS3K-iS&fDh&f|&QPMA+@`mMP3|n! zD15QOHsLXO6YWuChF#Dt^KV}}h+87AISeosJ;fpnZ3x+U8)LXTzT-W9)(eJJ>mdY} zWCbxSYkn`E=X#wd0kOMZImWxO#ZF!{0}fte60gkV*~Rb{{v1L?lgJg~Q|+6ctfS9F zP=pO>4C!C_a*%o1(cIFFluJe9>^88@0|(SxYN}r+T)Vi;QroN)z$>wf@d4hly;%h( zrz5S)ZSyEwu9on0u^gq4(mY%yF^Q}faaomDxVgkeNBnWw4s2wWt%7wG)^~$)gQURf z<_uANh84|`Q(<0{!gZ=}7s|gg2ux&e4+o3Mz-~tST_j)r&4j3&lWue%|9H*=11?VE zYJ+k4Ufa=VT|Ec)TVy2BN|#&;{?w_K1M#u_;wGC~3d_S>^*CW5Ox~Vl6HKDTv^(^% zXNDk`cq79{+s)1F6Nrx|MyI|Z`_9R=%Y%eL;%kb)p$u(+Q>ghx0fe{=Dy8`^4 zE_xXeUIN)#it3TJ#*v{O0n?Vya6y9b;^YosU-Z&P3)G^6dZ+eb1gI!MHV#$4-5S3q za{7{)l&iXD+GPZS8=?^gu}QQfj~O4u0~>D9BxnVg9TIn;2zhFte}XKBU}FGAN?`rQ zHugRqW1&eiweEQl@7jkCF9S@Sw&LDRdwf{a1*Bka&=0)tT{F)oFLvz(fzPruOH{W$ z7FuePHXco-fRw?sNrjEnwCSQV+;RW_2UFl=VU&IyqKxbeKYG!8d7c|}8dDjpnujYr z;yxa)mVT8fi*Od)RZbRp8HH74uq!v;+Yc7UPhjq;)!ag9eRWB%j!lmhFTstfy&7tq&hBNkEC_j2^dkg<4Vsom3RuXJxH^&9pwYDq7QM1o6^wE zLeA7DwjqDQ+WVrnT}G+R;IQQfC|6~GT9>fQf1ZTQ^X|3#5o=Z-FKFaWX z_{Fjh-1t%X1>g}D>o}B`k}O8Gf;{WsfE=Ev&{rks67OO#tCeq6tInaGkjLldNr{+S zv3=0kb~BfznPems7bHxFBHuo-kTPjsA1~r&LdkggflhlPm77z`b7g8{3PhY|dgFQG z`Nw+7%E8DDozwNf)Mr*L2^px+ICeE7b>gx&J+&-LlAdR5biaUO&(;*BZ?kY;@6+@r#$?;HKLo;eQbL#YIQ7M zUra-BAyc?SiAq$Wy{>}!CH*(%+4`yGvuL{UDLGIDCR^G|ZCAB)!(J1LrJ1e6VdcE@ z$&^)@PeSnwiP_cwFO!xuutAnkG0jm??I2&I!8p1M}!m-KgJtA=VNrNEj=JuDgVrH0(G*<_5)c0XJE7z92& zI``N9hRGvnTHnHNKh{y2-(4?xDP;E4O=kk6OKmwAZlbd|ai6OrI-N99Q!XZPBhTma z6W8+FDNJb1C%+L}B%dI$0IleY*oP79Hx5reMIxcRn(0tu08@*@KR!m*dAh{Cwb17g z*mVJb;7n!4yJp%&r9@52=Q3kUPDesUqwv|&1rdVN*T2&|A=JsoIUU< z7nBY7%eW&yaXQzIwHE01Dq$r{X#Xmfg4K15gBum+Kx0>P_!Bu2Zyd)?#AJo@S=mO} z4G-k=NJNKQ;s|TP@GquRZv{5PNCaoy`S3kq8HY=ap{tHj-QAZeAV*!2(>j3dPt#_y{`P+RUk7e^rBm{!ez`Uqj1> z#(%9r`Netsh55f8{QKQ+1b!p%8-d>l{6^q60{?>%fcPWt@AoU`Iym-qIijLuowB@; zp@pfwH_o|T(Ap+b8Hxdmv?#H>ff6Di5R_r9d)m>7Gk2<1?=aSDdEzvz;!WAxLOvJx zK}`vb%*D)ey$zULH6lXaVJ z9yn`h)IERtvBu3t@!e^Fyu{soq;7IawQP5_kDm#@s436Vz!U+Mc&!~Q;+WAWsh^vAFiJ$x6hF6BLs)m;v8<~y`7h7XA`sGVX3&s@c;2t4Zh zbDkI%?bQSpeDuBFI`-hdqHXlwemhz3W_G{Q);?N;-Za{}!B6m3Jr*IL00SOfv!W!_2g-%9e!%UkmZH1 z+m-(&0}Sb|(QoCNsw`mktc!>nLK+@7P&NE}To)4oSw@K!Siw=Bu(e08g*+9Fu@A8b z5e&=2CE{ZNBy>TrRp&HdmBC^gvPACtw9CcjQTZ&|htuZPq9ktZN3v8d8^?xT@57HO z;-qaIW#3iTxnO*%kv}g$qD9X)SgBr5rynWH00*j9rAVh+d&@>061i=MYy01Jng1vaUIf zoEeGnr9}F5AMWxb>WnGe?#-^^UibIm z>ToiMVW4~^XZEL3QwC6|9Kav?0Mw)Dh`V*?ER;kNV`Qh>-jSR1AfCZUGjO;b0WuNf zIK%ojGO^4l$x8}Tj+NI6A=(TBv0S|tlSry1OI+2WLW3yH;^5XCf1o}wNAQ3$zc*5D zXc%e1Y}FtOXhyI%uTf{r|Ei)0%v-XU)43|0jc;52&cT}*Gj-!^M(lhtOh&_c zcv4hcQa-3qLwp~i+jCb z0j*t^ic2+G1*ESeSl)`}asBlB56LF@ZjKHp5VK>^QGDy_=5kpf*Q5&y*v$h1&MvXDIe-WQRLRs0y_OJ83zE2@*}AAQZmcp2KaR^3L9+;prBy-Y}S_lOOC*BzmCTjbE#gO&Cjnhfo;d z%qzgCzutb|ZMob#XJ!}Vz2B^AGE?+;-Q4-3XVE|U6_Em2=tjZdr+?aw%9?b0l5?$? z{{U~oj@L@H?1cNJUPgx}DDVei*&KRpkYmybR2X>1Q+9yk$`;e1 zH+-{W&WJrDyo+rd7Ka2j!SlESEjTj*g6hkZ-*|Wrv=6CzQip1KM(~|@9 z)H>(@e?ToZY*$zN{Op%b4~M8dE~!}VHg#=G&JOdS?%UeJ-tDulr%Sb$-Vd5lCQZat>Gp1XIv z5VG|?`*fUpoA~BBuEe_}vEMmm_Nq@fOar}CcsC8e+Jgx7(ajK#!0$k~>+fp4H_WT? z^RhhyN2fSB;3Y-TeGNt}=;q0HbpX}YrhME-)27=IOL}t8n{r?!N4Ocf)8X#_&Pcvz z^u$lhI;AkU;W8X6_+>i^bd1ms5Dh=AF%Pp{(0hEGO5FNXVrz3q>5WQItzA`B-^g9H z!vx3p^N5*Gsr}laV9RAPonkdE*)x-uVD$yUZ!c_#W8O8Mr}2Y^DTUFAf_rU*1htI zv_+F6>JVXJv{y2tZ_KTs@`f{xw(p4i9tX=lDs7Xe3IOjBJAf@wt8yR*t zSLq;qOG;rtvj}5ET>Fe@8smrnH`aKOsZ`cVuz!F)h&@0V6obU zSxupwgG*@%x7uPlV7JE>ruvIkygE8YNO{h1*wnzP?)w+cY5KA;NNUM*5do)=JvU%} z`6gJ}+7BK>DBqpWVT?Gn!AvpxzZ0B&(@xT#pGgbx}+(##-oP5Zy`^7T+RiC&Zv{t9hNQwrG3s6s3 zihSATGan;hN4w92kF5r?mor3D^}f5SKxjH$`APHnJ^P&##yJDh;=@(qRY`xCKme(Q zf5|MwD#$k6H%=v0cArdy!M+HA(Q3%vk`fCJ^L~ivGMvwf2$yI8*y}cmQvWvh9h+Jz zN?(0K8uH>2GP6EHyi*|Nh$)rHX+JR8FVhQ&$0u+zWxCKIOpfl&ddMLe9DUA;((L9!196e{4&4{)Y%ai z@B~QyKSe-FTv*|>!RqI&hCP>U=i}@=3XnQK0FGiKI~#jP6DKDVZi`qh0X-v zAo8pxk>_!c_;(Wkj$i%r0akS0oCwqZ7{GsVGnM3Th=BnhzJ2~u!<^j!IL;N$Lopk_k{-+gP9KV3Q zvvi^JenRO3jo)(=zl6AK{E8I*-QySfQm&aU+4Y*Ez?e_{cT1H?7I_p)VoDGK=7azZP!OeC< zPQZZI;%e6L@*%VxLM0N7R~Y2A!dm%H{XQbW?4Kv!t_7DOCEd!&VV4@upXxZ+t|#W& zQ<_b)CM2O9TBjLO?#*Kv5D=nMiyhMmw;tEkRl_-m=kr$E8cu&!dn^)N*o{o|C*s)`m^F5j@ zawM(fYZkzUa1(R+^jPrpi_%G5Z04zFU?A7PnCXuSMrMmDwIWEP@$B)#Dut|M7bxym z3|i{5k_S+_zw15W7S7yyHjM?@G#-DR6_)*w@fSB^I>t#%b4 zN0&*bi(J8>NOwk584&*}2ga)d-dbB$0j zs{2j8<6vAVe<}$wUX^L|-Q4PXgUx+SJ5@#AfB~xqkKfs$t&qO5ek7)i;Agf&lsq2P zuas#*Ot7)+2owuT*f}dDiD)dk6fspO z9Vs>(Y)fdyH>qUiibL|4lS_G=j|FX}{9dpVENb08GmTabM4akDX*mK>@V#mo^*X|G z3wt>Bvr;tZw4_229&!-hPRMGLgOWnBHjgSltucXIV<0J8Z$X#WC4-wXQ-NPREtF97y^ zI{jI0;ns_5>7273h9nbOYoXF}vF}Rf{oW;Z9Ni7@nsI5&?dl37=Zi9Ts*}VBLTkSc z`=ob}A8zbbs>I8{ciV2L4tRKXGJHyh;mOE}Nw#Q9`P+9~nBFVPmOtN*FnX)Q`ZVox zgpL1V1^%AJGEq%Fg3A56k~Ws7wYTY8yyW?>xr>@q>Y|f0RUF%UyaEgRc*s)PVi()N zYoUVp?11qR-iX@qHon3Q;wxNYhMNN!#xgrk@)w%0HjbZznT}AhC_2_Q-&NafQ=?Kj z>;9i^4+elaQz^e{g{S%+&euBrpY=+Jvtkec=3fua)lvYghtNrRPyj;_} zG8+>#bNavK?CJOD)9>6sKL$_|f9sixEjc>>76Lqb+KH#(lI&-Rpgo8H zhyaKHhyaKHh`_&^z(1phLjUD@=&4S6>Kgow9y&vQ))RlJ9%^g+ztcm5NPmAAkenxb zoBV!0pdQLx`TO~P_SDPtP~J-vOFXoTEduGG7kPkUAOau)AOau)AOgQ40WiSt``Hi9 z8x55Idq2Ah`q`zxE6-v-+9!W5Kl>l)<-pbe<)u9S`}SexQxDEbIIZe8?e9naIA1ee zw!2K9B=}b1#VfC0xflm>x+MF>%%B*E0EhsH0EhsH0Eoc9lE6QsKm8+iO?%F>9{5}I zC$38rA`6U*-44>9F7g1yKmk80)J$WYddc=P+n?(`r)~kRLrwWfmfcz zezXVuyX*2`;q^jv&XHSkBk2lThbTL06BfDz{O-x3`77# z07L*p07L*p;MXB=`fp$8WIK68?a+-jIL|35nXf8mBMia-DGSn-5nJ*)O6c9WP^A@a z+7v`_x*|-bozF3hFGj$Es=Ft1?lc*iR_T;}8Kk0FB04KYIY9c($UOE?D^+**lGIY3 zd2>y#z#ZW^he3#R-7MhiJ`jk&;|x|yD7xPxw6}xF77+2 zi$TX?v~mR0Dw*X{#@5z(Bkif{RIvGUf5sIezdO- zaKfJGmOu{c_IS0|6RyQ++dq4|UEICO1&iL+$=zLLXg)SX@2jrGX;gNX>XS7|Uk)tW z$Cj&d2;v{2EbYSYy^3Y*_VFUHlj)!FWs00IhkR?T)9-1Px>#vLO)8N(R>v#uXjoM3 z31_y-9*CK1&VChV{kk;~++ABDnlB{a<- zY5UTarFq%GPEC9|Pn5;X-L{CXo|433%Z)cgNWJiM-6H04>HItQ4Xb^z_qB!buEA2m zvGJyLGv1C$z5WPKVM6>qZX#kP13hZ`!%pt21nN&+@s4HJ1Fz#aYluHySMCtaW^X9c zh9bg;P2BLmaqAj#^t`QD1j5O{JqchLCp+(x?cSDyF(kHhvw%)zh=4d}OJBs@2U7Q| zYgh-&DmKjX8KT#=HPe;uNktZ6TiooSd^(NCJ6NNRwPqy3KTEHuDf0+9mcC28wFEWr zYWEDpkJ$pY_ynrVU7t9luX;h_#e*F2Mq84mb5*AJBSio=W9kn%9uST@ zIaFapce7GO!lMgKO_JYKiAm3VJTeU*`Gm7Fv!3}ZV<9>+F;m4SzA2@*p7-{q$KmZt zh*XdJr8Rzz0)@3|Ul3lR6XN+xMtcG#coXZya@Ny=1J8J5BgMQDIA_ucPFwK(p0q<47s$QzYD_S%#Q-wJA4Z{>%`QqsMnnR)2=T*d8S^N%s zjF=c+iqKJXN*n_kDaaCjO1#kNj~!djL@WIs3^! z;*SF;opr!zRlk`p{wn(k?6(px_VT#Q#W;}DCD|`#2E{-GKmF}_Owd`jpJe|>`XrD8P+rR8AM(eY_JPpJ zb}CQ9Z=PFK&^bGGT2)~3|M~qXm#vR)yU?p=H#*%1UjKf4dIaS3tpXR5K`{^k5CISY z5CISY5P@HSz(1ou{dM-ZQ@up!Wcwf9`w?k+)*pY1{@O%9yiF{F@GD9gEXG#oyjV; zCIQsLoPBsUqHCH^h9*-r)vixSk^&oIgO*=~KUi)iM}%v3fz?)q$a1+f#;WbtAQ+ZDNRz0rbr+3lVJgbo9u9;H`@Yp>4#EPDD z1A#ZqkFLE21y?~(8<*3%`fv&HDaS;9!?TFjn?~L(9srM!R+uI{E$NSXw;f9NlG>Hr z86raIGA8-`bze$ZQD<&oGcohr2^4LcY`Ga}GAroK3~66%a2w~!yRy2XvIgy0c>F6o zTexmlJm=`~CbjRQu}SNjq>X7Y1U>?Lu^VE5n2oM~XN!dNd5@TFqDfE4j^o4k{k7;y z@7f(2a$Tc>?kGr0$iwZpJ9Gtwjz4UC)04$T{k&ComOB4fBQ46eOJ6h|Bcir@kF8zS zy-qLBdxl0%rM6~ds-F(y1`K{Q;^cB3vE}l0{Y>Uqp;lbj&#HZ#>rWMvmeNoMUrnBk zEU?pe9DAQ^Ckt&z!KlKtmzJA0KckK8TycCwBfX1@)PS1vVJb907oj6PX@vaVh^tSJ zbs9h(R?5$8$zFMoo@N@}snLrC8c>sIRs1GNLC2ggt}n-E$RL zXv{`iDhpYak>SJxyEa;vz8jAISTkJBoAIs7{Ls=ERNlk^DjEO>SunUq94aT0P8}N@ z2W>p|!0kD_sc$0RywY6#-TYh^-ngs&wyB7@ZfyJ;l{OUZf3{YS$Uwz`tz@iGt{a6*^O4aZ%E7RRsWJtG| zHdYrmgIhUX2}44e%5(Afg`cn>LK%gbAuApOLXt(1y{m}J@TDkd(lsp-cPUp19umMS zQy-={6fFDcMW!xFw=7*hvc<~Xqo>u|Q*620kS*BML+&~L5hE(&UA*kHS6e9O6at^T zM}x+6iOr0?E?9qjsaotZgCpz~>r#%2Zag8^rr9yh%~Ht#oR_N4{a!qCriTP8=y@a7 zPuEULFfR;tqglcRW1)cePN%i?O@J-KTx(E@6d2r3nN8(1Z&2RH>d24bd&D5t)~O`?B&jUV1imt=R-j%&8Z9|P*QWM)MWX`30koDM z?44sd*nUw<_G8mTP6Ixp0gDyhM-<^n%6)1ZmJejIZhgG%;XdXa+%6+Vz>6oyUo&O} zdHpReO1T}`$&3YsuLS9b% zi^KQh(<30KZxy(h42ppWfCzvHfCzvHfC&5o1pXQQ=^ydOk=#4$kH1BKTD(Mm5^}mY z2tfMNMIN9ShyaKHhyaKHh`_H%;E()qz2}Vv%1iA}KltM)Z=GG5azXR>(H`{gS|1PO z0F;;V`0v}}?8DAF;IyjW%wa$B|8w@Z<_o=gK1fbPO6>RJ(<30KZxy(h42ppWfCzvH zfCzvHfC&6D1i(^Gn&N%3ovf5`u_MF?_XNI$Dn;d(1MvU-Wn8*ZbJz6xji9=$B>Ro zR=3w!EVtcJIUskrX>d2#J3HT{&njj!uR>=hb{8q`2l zd{`wL)v)0q1}B%z?j!$V%_Zd6CZPDFB!?j5AmW}D%C=QsMguECi_dK_!>X8IbSoV9 zG@rF%&jTm-wxwGofQUGLVxA!%2kELq*y?5BdgF={5m2F3@%!|Z4LU`E(8jiWDilItXV>%;FWL4Ja+{9c)# zphv$~<|pXb?`vfKsDCHj)O6ZXFrQJh2y@39If3z^>%DF?<59lJWvEZEyzRIbKB)J( zNw~FRysIz7+LQ6H9DrT{vyJRx7^JG&Oz@eq$4D{z>ND!l2OUSsv0GTyjOkUnEjod0 zN-;@{scyb4MOCB+_Ojd^0zz97@b+m>Yh0-0YtnAtOrFy+6AI)M#ha72s~m)}bJEz% z>#84BqlL5;86VYbyYUo%j^8uCen}G0P#FZP0Pn=3{URs4Z+-7xPLF5PPE+rrgqCc^ zuxDo1*3IO^rzF%y@v}eMwPdywZco=Tz<%WOfnQ8_l!>0)N4F3ui#NZi8t-o2{K`se z=Qf(2ht@oKgF!f2e{q}kxbc>$*npGcTE`17f;&uBP`YZy9HFwv<4DNaa$_fNV{|?2 zSp|8XL!sDBm~QZ?#0Dp`sGz6mV9ib&XfK?#ZnSn;>DnHD?Cb;gMkk~`&gZpSUAaS( z@%(-|d+xY4O6comxEYc<0SD-ZB4S*ej>cLW;l-)iYF=nVFKN=*h@N`wl4vh39tL!f ziRRKSKo1IPRC{3^TZ>e;CsvBv?=xHMWwpj%mlNkUCnGDSW|EP}Gp>2>>w06@H5J>w zXVKVHg1F6g$U4AD)l?C^+onpJi;m@;>#ZVNrk%6TBGCTdLg0`5aX@{9f-AVXc>?bpq*iVcfp4}9<2XX@0Pk=-~5<~z*07L*p07T%|Bk)K5xP|j31Lbe^ zr_*QrFMnT^Sp3faU!1GR8RDZHHZasg; zAU+S{YLimWeEZH_0WFT6gCefcQmY`P5mNlHu>vABOitA{S$|RQhZ_;AsKrE``yN;- z{dz07cPW-q{YsCu)$eJDQKD)`mYN}M*`cuLhj6;;o8b^GrH9pHIXoCn?Gasq!NjmE z^H0xS^2E*7I#!5=U!~-ca7)lgAI&}{w#bN!Q!AnDGC}{#$#l)4#I2e^O2rP)f8pBef1wr;%tfshWMjIAkwhNf|Ddp)^|SF+#>rMK zJ}`RIT^f5s0_Ks=Yp}$hLvtD1u!*25QfM7NR)p58+f*qQB$`l^KNU+|(3Zw_wIvs_ z-K7(feDcg>Pp@`^9UelM{B8%eVhAdR!dWY{JaGaa9CacXXNy*aF7)g3tzvGz2T<5b z#ukVecrWEuO-vYt_17$ELy{Y;%r2HNPNY85m3>E0LdMg+TS>3s@ObJ*mDTgC_qU$Y zJwUn9nrRy4?$OE`zPpw%d|TxMjGMLi09+VWN(@w4c2@gB@e(yZ|V~j8}_}5-%yuqf2e(13%>W}__Z{xZB>n}mngFyp+fnoiTTkI$6m)K7LB^SYe z=XCKrC2cjhv(zG8_zDaxuALc=nwk2{BdFB zRMIJ|6@tc(Jef9-R#jRmX9hjq(yw7&)q=oQC4BK>79*Zv;Qe0Yc>JA3z>p;N!er3( z&JIf)eI96n13O-mJHmDh-pAK?1I(r}@mLK%J>F<=^8nu;d(km+bJ#fG@cQ=4+0RvI zhBx>W67O(Jg?*WEi7xDTUp{(Ra2YtbZ*?{xTN@xw&8L z7&Ec-eNi2hbCOYh@=F4;rheutYXOfW82U@iGX~zLm!^Wj8kx4njA_0{>An+itxYU{ zx(?MVXXfbW4i+iVd;pdm`CR$ZOKydj4~KMyJ^*e${~D%mE57c_(26YXJU?eaE#*{;3C>&M+EhfRLd@{EBr$7A5me*5MO8@-%&!!|OG z{@``=??&~!XQpEM6x#m1w#<1Ar_7ntu0jr2;60+0%c{3&5R0SmL@KKCv(A=FKly#m*ZogFD0N|m(y-7$6DT~Sf#Y$ifziyt zw|?G5nwB_1zW~Mjd|;*(k`YGQtJq2<@rhZbE`#$eS-7!1EUJ|~&U;NUmRd%|56c&< zms`^Ac9`Gg-Wj3YpMbnVLnec5`b7-f>e=%WPu5|9NcGJ!tp`4=2xfw>&sstI3C5`M z8hEU$w?#w|0mksFnYLbZPj|Ra;3T0^o%``r0giDrU|$G7%84b{TIR1Aa*tlavHkc) zcQVgjW34cr_t}EuTA^4oRT~C4k8T-#C5A%#efHs+coJcYG+)jG_N4Gxp0aJ-+i_hs zi{K6hdz@>1c$WIXCekq2I8tBJ}>0>G^9^o~PFDVI!J%M4DDknqxt*s>Ctyi7te!YAOau)AOau)AOau) zzYc+aMt}N8{Bgaf-=I#y-=aU|U!p&Gww~P-xCiM^KoTGcA^;))A^;))BJk@H_#=PZ zhx4Wb<)!wgAN+B!GiR61UC=y!c7NQzt*4dbZ-{{bAOXOq+)_?|$?SIm^?6$T>ALUV z3C!nd3L(Ff=_E<$JlX!ln)+`Jj3L#`W8C1yLry7Eb;k?U9YqX`Edq&t$fn^nCXzA> zg?AfAVs)R47gJR0&q(JY$g}QTpBF8^!%>oFMkBIF+4T{v;jJQepZNhU3~HIDc`R~f zH-}gQ5TB=z;Y5?lL}HS~WFF1p zg(X^AxkJQor_wmc9I;nRMG)M=Ii}mAyqfS49DTZ9(MU=%t&=Hx+MAnhoi~&owQV6| zn-$i(tNrF~%cuP}haH+xxs&4gSz)q!0(I}M&8Fs0SRuMBWlCZ6)tJC+NF>tX43UF_ zcgKk@;g7t9P%4VNLh)$BfMtfrNNvGA#A4E=Y+cn;Kx|?cxb$uu;*vPzFJ-m=3Ry-ZqUh`H5GR7nz zuQ;hJO3y7cj^ucRI9k8;vHK)H1nfjm{%7fftpv4KA*0@G*1xICGB> zu0B}6BH4PCU_8%&f_c?h^{~1uRw8|`mgc%q!^iT*pk1sv?m@;uiO==AS1|C|1)a$# z20a-EcX1&?@L5t6wKf$e2TT)(#mmBtkt7+#$7DGuBi`F1UfK8RnI@ z6iqVUp#8T;N-(6wQyNxYaeY-2xHz`EyNQQ4PR-9J~HqW<( zUns(i*%47Fpyu8Oz`fR>-hNh5v#L1#NtgxEUhM3V0vfCryOH#7pUu)ge#q?ayFGIx zq`icKG}d?J_BA~hl@7hAXLWg&?OZHuc_I}ZPLQqjVMq@J+xR_p;U+k>8O^j=uf9`o*-~G7;qpjSRZg0}e+&jK>hHyiafo!eDHdPk);^XgMV zJhPXEW_#Fs8Tf1NY%8OLhb+(B3DBydT6+zlc#S{I;fRY3c|FR)UIjw$9%k68kh(@lHRzz?45Te;QLof|`=z1q4gcjdS&~X`PO?gA_z;BwfoEp*uD6lJgee>zS+xtP2S8wwf?CanA@L0wa!)$+aZ2}X4 zF`}1QZ6&MLp`heZ<_Ed_7pFLGsQCu7h00#K-9|8K?NV?d9 z)l+@qH{tvI_{~YsA&3Bo0EhsH0EhsHz%NDMpV6ED5&yf?x$g4+wBEdR^Y@1*P;W-{ z`TcxAy*VZE_wxyzC)WdiDD(HT|Fo%>>CI%9=*@1QFE$0FH(%rdih&4#2!IHH2!IIu zx&*#?{bDi)Xv=F59#VfPFH;d{E=eYWXE((i6DB$V27?} zKYnD%{WyLu8<*$qLdPu{5ajpAl=gv{ZbTEHqT%N2QwEFUUxWHa|CD!_DPWA8iKla*h|xxU6u zW0J+6T=+l?Cop4~+;+uA)1-7ADolJH5I05pw3Lu-()@-MfO9P`K-ry!Gj|0sz?%69D1P zVKPwHoG*k&*YhI6{T56V`N2 zp~rOFX6QqV)UtNpwVa|n&d*zGi%fl)R_^=dN7rZJsU6o*0%w#QiFe-S^nEjyo z_R*}r{zGIunWwNu$f>X28A~?UWAGup+=w~EAvzpp-xQ(=1yk*7&t`{f6%-AXd@%r- z7SIlUs}RO&v&|sY-3Hn`iuG`PI>s8c$I$k=DoiD_#FVy54tboPEZW$kBZMa&_^x=0 zJPxt>qw^Z&y%@Z#4y`@DI)V!_xxxqxt3yZ@dT1Rm6k2Fi6+8hMol1yrGBW{Qi{nR4 zG-g@6spULbB4&x7k(b79s+SY-`%cQ{174BWPjrP-)p1rZuIbKe;2*EcS1sS98uQX7 zoHk1e$n@|;ihK8z22~FnpLr~{t+*(LS8r~GbBeSSGMiTlJ-SXesI9b1J#BZb`QSAo z7PB{1UA3N}oJgyYPTh^AK=D_6<^0@i%1BO!p$uIBihKT6;vclk$lL?8#Cyp+d3 zFS>5z&jEQteiLz2HA-sg#C6zRIZ zE<s_tZ>%{^-l`}`c4wu5(@d9~hh!hHEbTm8MBeQ3rf+%}CSQm;{3)F|U* z?v^i<(W|fVQbRZI|6 zHO*Cum3W4WK4i|pye%($E6R(7q@VlPfP886aLi^6ZP_O#9$J|h8&JTlLOU-hEnh0N zo+9VfN*A$oSvyNSF~LX+Q) zIr-LEGf36Z1UtmgaZqX&)NiBXzk*qV!h-yw;3QD*&Ww@mFwecksaG+g%34IXB$kuj z*6*_R6g_;?Z2TU8*4$_LxC~ZQO`3~A*l8l6T3pRuWxP*Nte{q)LPIc#sF1@#Uk7~* z9TUDhrKd-5to``|>mAWsCV1f8*3Q1DGrV4jL!)xnZ(70X1|~HPA!?L8QSN(8pujkB zx9r`rNmiGNv%9(S6zV>em#!i#Y#j0|4YC7=OK9cpZ0s3@{597*vUewJ`Rd9xhu1ZA zOL3}tzjzcSZ$^?(X79)6AS&e+b`8247En+PND6xC*JBJ@zWCBObsIglNFaUpflKOy z;Q_qp^~?p0w^Q~vQF6B zTOMcSn#ue#GbyuyvE2d}h}UcY{zC)VPz~5Ji3c{suj-z*%qhO!W03?KL(&Ws%VnPl z?jEusgNRCfkjfR~qfaSrfK`ASq@Xc8Uv0NQ2zHn8%k0Cq59tCqP$KbJ2q5kn?CxIO zg;BfJ+wugwc)t_VO~K-he*1!otKC$rBD4~Gu9B&)^Ak`1%fCy1n=$aGrZN2!T(AJp z%mMoi4bTkwEe+620$HVgMy27Ov7h{P>#%_KlT&lXfBY`N2=3o#Kash_e)91O$bRxO zj%!ezKm8#%u@P+sbP`r$n&IOJ!Vh5iN2<41qc&-J?mtJm)8 zlu|@}#F5_E%F9ASL?AUU8w!SuM#58(HA*Z_7|47b4{79+xXx|0m5^T4?dpg-7KvKe zdZ-|2S{~%#eiQwE_8r0@I=LJ%4RK)_aiJU6vQZ(n1+j` z*70hat_;@;t@(3=KCdk<8`W(f`(zp9PWbi?1bRE9VGs7Uad6w-<3~OpnysZDvfp`w z7CPK(>m*DklOBeUQ@(`V@1m?D~$P};1ika3!f>iHc0i#jv=2awShj1o&ea|k5 zS)V!tXok;eMo@&*E@Ij|V- z;XoieL#Vt@MjE}&ahrx9n4SxcW*(({UDf~X-I5nOWj%?yNJ>rVxUI}IvA5ZXhHUyj zY84|6JbC~R-R2d9s*YGDfjWR9u*!S`!K;#9o|(YpQ+&QStvajIVG($iCNlLEx^mnq ziJ9mst(+MtMe=sin=^`rIAM8vFX@O<9`IS~3R)r290<*YU2{a;6!PCu9~**5B`h$3 zWpbuy*qE}mqpOVSLTEv6ys{A@6r$sDY>LXjTfsN*fTf>X-#o?~JR^Ou)r)>o*=Y)jv}d$A4V`2=*_HM~}~>|~`ha?j=5 z7l*l)BqD>-@7_WOx5AJ^BhX>1aM8AsrCE`nxxM>%nTGX#qRr`7o|6Efe`0Vtk*D_B zUiAbvTlvV*=keDjgFE!NE#t!W;b+nsbmXAhRm!7%DG!?_f?6!2LkG}>AQW9xQ&rwQ z9A437qT8svRUSMgAb_Ii^Em!Pn>5~aTSL1(st_<4TIbY+iUq$~I0$k#fd=O$>yPg} zxA2L(?0EQX;2{;wqj>fo-OLVFo^Ve@u$JFnv1+o4FZ4F<&w0qJzG*VMFJj(ST#8vX zOA&=L71)Zjs$ThsN{!ka2B$;1ro27>`~qLxMFD-eAta~~%iKBdmNjp4mg z-uZxQSrd?{x}?*Y;5+0|dnKkKj%}siyj%;b*{EDUuTx+0m|$?hWBGBpVEg+-tn!nO zzf%hJar}4mm(lmtV*UNPGZbG<8}xI0F`}unp25a~shUN0w~1+Si6a!&9Kwoy5|22V zn3d%B036-w<=V$K!Q zaT9p2rmwsPYp(Ka6*qbQb3JuObae zhDdssH6h%3(_?{T!I|iB$)-BEpXP^w_0Jcbk;V+W)vFKt;_x2m6FRTwuH;O~t@jX_l>}j7~Mq4P`bGss`One$|KgsWCykx3~Ia!}O0W z?B6I{^=a3{eAC)QOLiP?%CPT-%kWzJNM|N@@&=&C{1&6e%&G)RI#MFsJG)z44u-<~ z;Pb#olum923jayqkL*c6JqRc-)rbE3>v5>*&lIrJs(xDz^P~QS^PdPV#4lTq<99); z`+kWVm&a9U=V=6 z+kD>R@Fv+vpc{qHezc}=!!mHNoF{z1SyO!&$?r;7(pvOxb%@Nac>^!y?AR{f$fO1u z8;?D*-)&|7ea5QZ!8n4}eM#|6F&abz9}(CljK@ZPr2}I$2KRV}{ScmX3|EhYun_pQ z)-ui#^zKZU@waKV$WLq)-I0`9H43UIB}&>Ylfk(aYUD# zT;GNJdR|}pAcE-0kTy1nf^2%Ondc_`j0q8eBTlG+u8E|H6Ov-=Y}5m=y~e5e*>@+8 zWlQ5z-+b{z7IF^T$SmT{`XsV}S|P0Rt^KT{rWJ%3bnDUJ@w{=t6p2an#*g*3_wV02 z^gY^QS40%zJ`moJ?yFI&FCK;^yM}KNR7EDF5F*m3?;+e}@&1*NS_(w%{mFLnCn1mT zu#j^f94rc|m#=WHy4CmD?gfk7)O|u+A|^O#J8;FibCRdy-c*meRV*eQOzOW z%%5Q13i^QeWvCiTw453nk@CY=tV^FoWnb)y-`g~G_x zNN+z%KD+s8+VA#okAE&KM|hiMR*HT-jt8ww#!!Npvo(Sv7lOSUr&P7kDtEV2Zhc1a z6AWsIsO!a(ta6xBO8Q1Gu{{sGqFw1()3JzFiU|%$nu7JBZeZtzZ}_O)n$XxP$Kn5ODkkXy#{Q#lPRIz#@AnOG{t?b`jDNpC7yx%excXi zG=fm9kl2DO!OssYvLW3I^eXb-`rt8^8a)GxaJGJdOk=_#PW@c_9P&uZx#5S6?6n503rY)03rY)03z^9 z5r9ZJw$9D&=1P-@1XIfMa5Z0m#`|Q*reZcy-HU9X@cyn3@0-&0@ zaDu;L^id%#En20Z8jDq_I@>*l?sVrZUB-j>jX~%YFxT6DA)+KCriaNd_BJT#zT9>Z zX+vYZsl|D-)}uqql3R0(y(S)$FvhDQ-u_-d4r240LPku!?zYCJ*87zcGW6IHn(`Kn z9VR86oiJ`~3{b_J#3`aJ^rOwfoSh%&7YVhc-hSSiFpBafn`gKcYxM#BH7HctNo zU6NSuFuA|P$Fr4&>>K<7y|#yY+p2udGu}|Ecp}s2B8}H4=nl%kH{c6jaod-+@nJ_x zOV7klOV*8keh^AQXjEFmkBisS{o!rU8ol~qL3<|Ia8M!#_6(N`8e)s_$yXV8FpRPW zn{_&6O}O{v%lI7Xq7;19({kaNknfzfJQH~txBWAb6iD7BsKMx2j|X1gB1T0nAe_4; zmjNd4Rd@T|i&aXS%DgGF4u4S|)k96JH-s_UZ_Kka<(<3IgJ%mX304A=y97udr30ul3R*J8QASwu-QM8xL7s6{f|IjT`NUOf5lTJRf|IzD9@kUrE*QQf zqc((tNx+R%8LA(Ik!w$(9jrx7xgJ}K+Z*cRK<+Pk_hHv{3kA3L#Li&x^w}1!e25ba zp8sYljj~%^-Vab=MyWEp)M4~v2#MQkmOR_{dqNseP`4SaehjXsMFEk0C6NjO6)byu4xXBQ!KkvYz0@(Db`oT#b*FVK*o|nD>AT4z z1Zm$US{JzHOcTEq;4{)GKVP!l+661)AYnjDDN_qmmm*>=bvyCc7@V-+je&zs9JgW- zi?zy2;Sib^v{gwS{A4T8h@NaCwpt<~8D=27A|CR=5ZUdfuwF#$y zK>_|9Q~YnxIsO^@$v?6l=Ro~$w4cmhVn4}K0@+XghG7XR5<~z*07L*p07T%YCh$l8 zIN#qz-2bkJX&E%U64ktfW>DfAP4`kO@9z-AOt`r@3!e0S9++SMdff zXY2#1!R2gEM5);fjAN)aoPGu`znj-g7QDDK1?owg0Jj?i={ z=9gA;_gXH6LD1MzaoF`JtoO>p&S)ZbsK`GGSRTtC*z>@Kk+PNKo!}X-eqVkJwQ~3< zl21BC#FoR9gwvdu9=VM(qsDbBQHQpZ;caMm_GqOi9)gz(aeu540)`O(id;S-Ef``Y z4S&(Qruf)5A0fG-hm^D5Si)0ueHIb#IecvyIczBFio=Lv4Zcn-nUp))0i~q4W)D86 zP2$?GOXa?pbpCuMrkEo8NV)5qZxw{WDY(gE0AL)qszT#LwMu2Hv!@ zc83dUm~4#G9}HlK1sWe%K5{5cy29xHHtdrwp6bTI;X1W=rjd)GhJa8pJ9F>GyKolE za9Ta#JSja34v#5~Mq0Xi^H8f5j5@r?;?EBeFm6qu1;<{`a`*h?DV1b*tz>m#g{x-i zIR+L|fx`yW6sx`wIlm9`$JQs~95k7~unQH~uG_~coH`m0=NeF}J zV|rqFSD&zQccul}F&$4FTrt@3R!=nTW9cuPkS`r)6#C2w}V4@xRz0}}7WdInMN96DPiv)W?L4ZR>Mn`SpW z%*}OVt$C_bltn57sTE03Vbja1ZP&zC*F{Zr_BoqY#AnwO^#yvPQE(y_ZqrI%)wvy+E=3o*y{#(AsCWxPcM88BFQ2~Otl04i zYT$t>TPL%DyR|Ua>uUy40LBr83~2)@dwBTX)WFTe6%ISSc+0#1ZX7FoH;D-r1!d_Z z#uxBN;re!@Buj9JJvgn~+sw)p_(?q!ZOqq+AV-s9^l>V*c&J}ac0D0!3$q=vWs1d` zoLz5Y-8b=1_)w7PTVB4dc(7VPN&2#+vv%38F`_NrN5$p!y&SOFjBpy?ENnOeK~k^# z+|NS#Yb~C4;V?WyZNz8a7WN!rk#Q@^jqfk3+eM4AR2nPJc{GNLt{YYpaP?gk^8AzA z#u@%J;9KJ@>=jjJ-S`B-Er2K1ic!)V;C>~;s3P|ulcB;*$i|%*ov09vjkrGTw37<% zsQH1B3N;k(?xr<%cw)=G7#w@Me(!x6l`9ej{W1SGDrQ2u+(r{U7ETgVf-pBnFLA0l z%SwZ0Er5rVs*rZxB7pZ1%bH~hZMuBOYpT}!Z>N5$k4jKwKm>j{0)J#r0_s6Pd8t11 z53R>J)xm^Lw*TSxDx|E=6tL5(o@VDI+7qRe$mv@JzGnp; zfe3&IfCzvHfCzvH{89uUlTX%Pc%N*(TVEW(j{9P^4ltGnWxJTkYB5+ti9bAio%>!( z71cX8EPq|`h=O3`{p_Bz+Rgj$1*7dc#B9N^{4-Yko3G(!JvgoCC!Wz@PC*V!-OgI) z7c}9)2`g$P5#v?LSgLQ?hpfq&YAL)m$D_r>hVN`j{A8I@*mQiZ!>#7N2xK@TOI$y! ztrme=G>wdnU^|!Zoh+WKtoZKgN@ShLH#jgqJy^B5Juw&5O`$YO5%?wJ%k9?jo^%f) zG?q^&4#t5`CZwB+H-oIC)9_7)CAa*!8Dck+OV~JGw2w+YB~s*N!&nw{cK#F`;;+Vv zXobo)5m{hHeQP>SKf0sdX%tP;CkLht-$rJp&BUv^T_;?D2PtIfODtwIlOK_V(TiO% zh5YO%#VtMEg^|)nx;8JC24xXQAqu1Cuf@wtiM=EciGdPpptD6LJGgsSXc`~u>MGMv zKVyaBWY+u(9=LpL3rTAg?`>QiL28KiGdl;5IrWA0v2IlLWFKn+#cb5SDbx(JCdhXox ze9!;RbkCi8&hv!YIcKp??YCCd-t}YEsuQ`l!|_3AO|sspugOJRU3?41+#h@Di#WA4 zaa_(^I#5j=)0<~aV;|=*@DXc2Qxx`54GqLYbZSJyHekI;w~2ZNe*fi65glABYnTOX za8t`MYn?`RZ8Ix{wapKmBJ(x|0-31xBrPOrE(SRg)UWKk_81D`+`_jr)3qe63BxEL zIR|TKgawPSAd0+3!$`H8kU)dwE?lo_e2+q*~Y7NsVnWRI;BS6`#nHC5?A7LqgTlGp4 zS44i@)YkBaTYg{exsT4&CTMB*tXj@%wv~{#I7R7qnh|6UqcMulvO#W~@KY`;s=qV? zR~TW2i*#;&6d!KWHV2VuzCqjj?JOOQYT@jsUIVf^pmehIPaZ_a=5``Rd#k;fAFP$F zM5s)lB2uVk%2z4r_{1g%UcwJZ46weG8LK^&Jd<4KTW`h1Ye7HR^f|>U2=HS@P{4N1 zbhFzwC7~ZQRjAMQq=Oh^)PuzKE7oE1Ve>hQZY{jljISf_dnH&5&lC)qjZMJm(zIWe zY}jDvG_BlbHo`YH6FK~DWNBj)^D_|`;w$0cLM3XV`b4j&0d~RQkvHOZPyLW31`y9# zBWQBIk8S#&Ppe1iaGo_)wS|3cPAjNhhG7w=f6_|lOY>Tq0d?sP$t&zRbbq43`nk=ae*Ax zAw|ROE|&N2-2t|~G&#@j*5X3|PS9$54H+6rvhHV0%pt|@1szpklw-;g950p$g7oog zWL~KmVlj_P3a7x4U=MP1!LweuG&n$D`H#BO050a=i@+bFpZxRUalYv%KZVEqm;EY& zJ^qFI$;@~3lO}(Fe)8|-(SX_j2mlBG2mlBG2>ek5e$`Jx|FV0bHxMu!5I{!x(`A(3 zv<5&;`P0>u-?RomQ~`)8f7I9v_#Oa(e;t8;6p#Dmm#_X-emDN)@ABg`#s91B5EI>c=0|x&2MzQ2@*+R^h{U%}zWVLp zzb`d@c^iu8=d1nit$={^fB7I;^!M%Tez9u`|9v}`U+ngye&6ofXwSy2l>09|-zxwmXor%t$k^EHE|L1Y%zxox_{_i9F=2w{1zuzAEznrE> z?Dy@y`PE>=@7sOzD}mVGxBKQ-1G~R(_sy@y`M*V2{{D6UPd{a5^v!gR!sPK5pA0FL z5*>NlJ)8?&HxN`X_pmp3so)uc9_VqrkofNZJJ>pYs)gdfjG)KHn&-5xt(d(;4sITyaC$nBbP-xMisnt6}P@c??r@0D{ zeziqqYeXn$jVB9bA7`xBt0yyP){|h^S$qd57bU!-SVxyrVm(AJ+3N`iYFq4cB80?i z8@~*GxaA@uD?O9;z6k4e+svzrt**5!Dp86OH<{Z$NameR{_ic8s4KA zcc_pwY2Z!!B-=gulY+yf!e|9(Ey0(A5*=+JTRFYqaVVll3msFG0*WfbvGIFlGse34 z{Zyjc1f2oKHU%Zc!G%jP=y{E(*ALYz-9KMD6T=e*tk7nYu{PRRRe!HU|=L9C;W5KM%Jj9^K+ zw53lTMP*V3#UY9i3b_o5ML>S7Knj+mC^=%#d4Eriu0W&>KZ7suyp0YSJZ;;bSn(m? zi*F{{ON>&y@S<@hMkdQFeN{6slgsFQazhmlFtTwdt=x^2F|fVL!YOa@t%o29G|6@) zbHUV33KY=@S602HV7MrWFXzr$KfJ$rG`p%zV>oJ-W7*}BP8W4w@iwUS^4DYW(GYZ% z(?+GIPAU&hHbZrlnW>FXAr?ycTcH~#!93crsMLPj`m&I1I~A+UaHQ8|_6*h%#6BaQ z{4N_(?NWu4dx#rc1-T@ZQ0#RS2K(J8i7xsJgRX>0T#nfAxAv6pA%##W5RrAtV&gAX z7+9Vt5ql=_ULo%hU^kOCfWNecgg9@dy|vxubgw@MHcXzVsRxF!z*$;z?@W8BkcA_p zu1RqYV&*uGbJ48)NOAWSzUy>3pF>E<$nQLuQvr;ZrIZs@erTx8rxv;#jVA(3sNE6C47<|l<#V^~Xrm;$c7-frhYFm}iC2ZhWO z>A;VI1Bq7!W7^n(<4+|wF2(ZcW(-NFSlyQgJn^j-*nRD|@+DS;BNI;xwg-ss&POHH zQ5F$%Qh?w18qT+-<7}STl328_bcf2rUlG#i-S)KTfpae($!!XVO2)FjOhC$5Zih13 zI_z$yG$ejh;Kk09{9{RqAOW)||-n(XKO?eojGdGN0fdVe(q903Rb2mlBG z2mlBG2>eG80RK^L0rvhS!dx^s83bYLQ4r6g142dZ9yxjq^RLI`FG@D%1K?E=Bv*%(V*As_2K)tc+#-acwFuqn(bR!xQq?N?8*Co!$BqrHHO z9N~q?^LD}JNe<_!>(^Z<<7B5yY(S&4lC0+keM%6+B4Di}E{j!`l(#A&80N3kQMKZm zt_qTNkrRZ>I>j61B`>(D$gWyT&MB&RCSU^?dE)~Vv=M4OIwsuUJ z*`a5E2%5M&6U4>q4})0?rh7DQ8>8+%SQE0+FS#_NChW-R&ad-13M@(_2|h}KOxArS z&U$%}?oWZM8Y)LND!{-|pcW#%Et|Z&ZX%a^%a0-2d41Zepd97;OEavV@gL)lzmi-` z5)-+0CY68@TY2CeL!=Oc!>l3^mhE$mp3nMAY4$h z15^zdLX%MG;T+3pr}e9Q>Y&YkIaG*DKMM8*sJuapb@CNCjvsYkNR@7zko-_msauiaDN3{BqxJ&sp(`!k&+o{~I z;;Q1iUFt6pUxGb4xS6~cEzohaI>HX*(q-3(LU9cdE0_jeq8w$G;hY$jY7CVb%o(u@ zKYQN!s>w5)3%o3Yp}wWH?c~KqH&flO-Q#qx=~@@aVE1hruWK$i-i}NTjM)j~TKb`h z0E;T9QS&CZ@zW4ap6Phy3MJh^@761l-KNtr+plKVK7>sJgb$*ExR!M$q)qJJ1y)9= zBmp5mTsOcIgl)JOYuh^e?+H4wWIb(BwVA9?4%mNPCfu~^qM_mIViN4VWN{~S&${1| zBPY9%Jt2H}Jw1XX0VkI^9!)V}NtXLn9~~}IM(u{6$cHj0T)4oYabX^qgm@T&IDnc- z`Kc$T7HGk;qVfXK&!aS7YrX&;2vNro2ANHulNG+7gfbWM$8om%yqZpVfjln%=>sv| z8~&rkE0sXZA%=?4_%F{ETC-Md^b4G5r3YMYC;H^^@5n?ZD*fgRN8xH(HDzKNx5-=S zN1Kq2)1V2nc@|BEhy2)99qMs@$`ekLM#)2tngzy0$%@q1iq&iq<%*)z^n)81^<4n z2&fT&0Du610Du61z#l>2SH0jjG1LIr;7^wge$yHN_25re4}Q}c01@F&7ZHa3ve!iy zFa*$ls+QpXHau?=*}72NeO*A&V)UC&{*UC&aYB^UlS3=hMCUmk03eAV_Bi){leF!W zFCZ+`d%c~g52T#zqMzgv9r2y!p|!a#T`Z@MCZg2lrM60BvFW8b)hlocqGhptz}L|t z#$YOUjY2I=%WY5c2IP$liuyQEH;uHryAOyv`si~w)(pgeXtMO~@4w>BZhN1grM|+Pca%6DMb>EC%T*BnwuU5Nq0c8eV0quHT1cQZXcf zM>snpeKVRdA!O`)aVWUl@vH=C-Y*pM6qq3 zj$l`F4!!NBPSD%7jsO)pUXA|(BWl#wvncb0jgkxofm_I~nQRqq`Ej4?LDaZ@jGZ@G z4Yao1@c=4#HAx4SeU*Wdrh#%NE6k=~9)dH4(pv;WURXlXeIp8N z(w+FK5e!_Woag!A*-^GsOtbM*P)y}zCIoVCLw6u|&gc+Xv5v;Tco=b$LoyMW<4t2D zw5_7c*Bh26qad{mP5kuno6%Xbu@!zl=+i}SN8RZS-+c7;Dj9nV`qk;I3(oP|-HTMo zIv7;t38CE$wL8v5Hm&SQ-qvMg5;3R-^5k=ct440-gZYXnAYnl|w0Wh13y+tBJNigX z^9{@a{KvGFf~(c(6^mE|^=i|-`xQ?5hc6l0OM<7VXi{v_n#Id~RJ+A({&ZfdHCM=P zi1S^s0#iLfpAxdbYy)_oaTlRJ#H^Z^8_|k2WD8tm3(%i!nNbXQE8^*~_eH(62uU>( zv6<+#+{)$AyBMC*+CseWe){4}(cc|+b1&;u&wpx3Qi9Sycw+X*4)k7K#I4_`2~oi! zr-{1Ms?-#I%mvEgZNx07y)$lLaJPz}Ef6X1JS_8(90Tz9qCJIKcrss?r-k8`HsdXr zmscCw`=n5tsdBVY4s+5x-Yu%Q*L%@XF-k0ZspEAL<}gn#35Plp@PuX*3xs~Vq;#ZK zei061g2uHlu`UUVxG#}1j~ye|45sKbC@Jw~Y|_Z-L^1LsW(!Cj4Ih*@wX#`6g@~0x z*l#;*#}jvbd~P#_Po0`u$zF@tS!hObp(n7g(~^F^(==MDah0XGxKw>-g@lrqChbVCQ>3)PNYVh{A;RD zqsJ^!9;=A8V-`IWF+B*`Yr34@levVyBXj*Q@V7G8w=iu$Sj-VKG0%!1~D)#wOHHwf!(VSThtVH;=&8ko*pm5+TGaic08VXhxU`m`)6I zX|8)LCh>icpS#5>@LZB%MP`HbqE?NXr)l+$Hc*hdT zYFSq+oPjBvmOkd6XOlCVeA8UGv*Zk;!&jszhfWGo5Z<2TX_((y_aW|?3eU6eqO-QV z@*08LwKp1>oki$)_4TDnFOq$sJJ-28FT$rI5Ce$m+6Ny#6ktj*bdbxh{AyXgvegAHE7EgF!*h9MsWAJpB!KDdi1B`l4>TI{}L7x0{%mw9TJw6Br9Yh%sW`d z=aVy(o~?1UcBBrIEIPuigu$<&m0omTqTbsPJ5n~}c@QkT&*Zd}kGIFxxQliQxwxEd zXzihar-2E|FObXPbO*ti&lARvLe4$QQ%g1%YvCZ%a@uE^T89xk)5vqnseG>_)9BgU zTMBJ-VguGkz4_>MmS#COFpHhZg4$JgHtM-1n1=WW;_Dcq~)%6e00)sjYd%__!G0{pi&9sxpiE^k0f&l_Uv zE6+$EbUQ2vY_-d29BwoaA~T1fO5dXo3Q^INxeydxkwzxmw4=cMFtE!~0c3@z=j$6$ zY|yH^sK@t9GxROjI@a3_y@zwi4rfJUa?{bbxEwQB4o|^JF(Pe^j6`_|dC8KwALx1K zR5*p#E|MDQWxQ|31?G8w_QTu}qI0XAuaB@gE#ZgJVM8@1De_!b#LD0r;PnGje)&4Xva z&dMoCXdTB?m!@kB*WP=s<}^%$ie|#1uwtUh3tS};$K~m3SQP7|v=-1nqyz1PoJ;KY zF$7uQb6P#>t!OY98OS37OjT$CHp~$G&M&8A&w({!@q$9G+0)ftF4|=~noD0#-S+k^ zq=P+BB1xddAJ-!NwjJ>1%4l^v1HAQS2)%sgR#er!3-$3fG9n>tUy}t2caF6Nux^I6 zsK&W_O^|EUQB~B);-+82F%7O)W^vAww8|P9DGH6L`USxa>nP;DJFmypJ44FiAe@^m zFK0T3!tUy}vPUU{J8E7g`Z~%F9^}|&WKHjGvD5eCuo!#plS=L6<*g?nQZA^qu0|^= zF<-+8lky2T(ZNe~%bK%KWJwMbIO;C)BK>JG1wKV$k(IJPGF2S>UT5gv0&!!?|@!9+N?PpN9uU zderDhD*V}-@of+|q>9MviX9m`+Qp4`-~M*W^=A&;fbRwn_#YAYW0r6KA!!oSKmROc z{8N{23tas^-fzpdHCX;uNBOV6rT@zt`|7`M_ig$1Lgeq;eOtc$!^H2~eOtagzxVrg z-2lZWZkV1_aQk{!?#Dfb05~6Zl8zT))XM-^$;n z5rq1&cHlrzf5~hK@^|SegARXI7LtEjVfpLp@6(BXQ%Zg{`BM-6kuLX*!?*IgJpQ1x zia)i1z>ojFsYg)1q!$GR0SUVNxdDEv>Q82WdHp?q%x@q4x8)zh*oge zsjC07{_>5(xAOmu#~;#Pv?%{6Jbu5wd>co6U6 z3(6VbjcSY#?##lpUK_Y7YrF_@dQvN0?P=*;x!a%e(%B-NT7(OUYSBC^iW8#s0uoyD z1-!kKL>zZJxegZ-_jOkhp+(u>Ie*a;qd^r(u)FK3n$cT#Zwl|&+%9aW5ia$T`h;#BQ3Nw@^hBGngKtpa5M`w6m%mYc{E19e%NGc_bW>< zB0U#0p(M#J4N}gim^*<~`A33mzTVK8yd5Us``}XN8XoUQn#%gRh8s}_CeXA6vPO4$_#K@5re8FEGZTqv!zb72N4SOsisc~hyZ$^1N8 zHA=^Od&qktq>x@=WxXpC!ssgwXfvd~lbN;{CYt5QsTJ67SJMVV)58-<8r@+P7g1^I zW5+eLc<7P>|18|^ihw*M*glIlI7)tmc&d@d3rvL+A@)JO_NCDTvz$b?z=LH>WY7Vr zte->>G*7zAuN7%$7o9%Ep^W9wB4tul2c>$Ll^)4V3xfuK^o3&_zWF6fS+!(9@&fhE z0PW{4;Trm}+f2 z?7KwOD3i%Gj6G4YGrYhCb1hab$UI(g8ub2_H~cM#1dU}ba#6=!UpfQP7ph*A11=rQ zcEi#-K5@ER4s5)w%!uUjgz$aG{BCMh?!t6cAfeKx@QlJ&Q zIZvB6>Hx+`9Ax`MT`1;KjHw@nQr%D1nl%sf_=at?cchJO>@iHt5&@Ne7?uxm?~bo@ zKaePrE30O(70)x!9HZEOU$@pwnB0TiJz>Lmp~q+_m-Wn2u*?1hEF-Lq>r-5EEeWs? zI-4iP?(`vh86uBHwnyKWJq=L0qfFZfHA@TcmGK=_QHCJSJ*;z3#obnxdV6fZBCjYrz}`O|#Epn%p)cBOV6 z#cmD=;1+3W)lX!IG|{qB{Y|sgmOM2jcVAy{G0e`R+6Pese`t7I^$GvLZ)pf}G$Vq` z_96Y20rCqL^jta1b=FF1U7f{()+AQDp)T~8A}d2y+Domgfq!gmx$HBxA}Z>V4DL1s zl&;0TN+17y9@$3@mB+Hnfxy-c#FA_yU46{h+~wGM77`ieRtGoUkG^SN&)~T;CxwP) z>?jmSKD_PQb#f4wgGZi-&o>lpI3-LL@{OP0jbM$*4xMik8WB;J+%y%`p(s{|98P}I zb6zfYU;nnpBB4bGZZwbQnY(W~$7L4%#wvo}hjKN+;ml!Jg;<)GL8~vYu3^5~>Rwfk z3CJ=Qc_U{bH3S0AJ}J39oLe7Phr_PKR&}hOn@IRHGVAaBCk<=g-vGfgen>p7iBVJkC=3)&<}2W=LY`!oG4G1&aIo=A zC9}gr5LgWo!$+@+=apr?o`7_TafAXD^3E_1mlk+wUNb5Bix&jhjlz*s%n%r2!!|_m zhi&25@#VKT&w~ti(#+myeTR@AeRD}It#ylK#oAqVrCZE#&v<)RJJaOf_hKCa(=B-1 zYG5ii#>0PF!VHnX*fNrSN@EhJM2_5N$O$%69JkzQ%NwI2XSKe-9LWJnBR5FK67H={ z??YTYi7Z8yM3rQ?fNxZS{w$(CfZ(}ZLZnuF!W&nzn))TR4;>oHoLPk4$u7nD<2r)y zAl)z+%Xtf>9hc~U%qlx$x>r%yB#5(QE*sh=VgIv2(HYZ#j?+WbkotVK?*i;a=pcn; znrpbGUpmoIT$z2VaxDMugZz&1GYBc;Jt_zMpdZxLzk|_b^2# zuefF);cIiB^4Al6B5n>=l1p)TOT2*4$`YT)~nsw^^#vV9H)2XhM zOd+y*^2m9fSMRf)f_Rs!*8+_b=o(3%SegE)M}qIg3v~U>to^UGX?*iW`Khw{2cD?wxJmDY|C z^RiWfwZ?aab#t%gq8o;6Jfg)(p(5?Ecmp4S3Ea5w@?^M~lUDO<0}fzR8NyK_Bw&u_ zhDz^DA4ZiI94l>JsTPpr?!wqY>Jc(h2Wk!`f#xozW=*i-Iy=XeiexK&a0?NR2OB4D zeZw*Cd5g?_Mx#SNa0NX%T1K-f;<#bH!#lWxY1leMr{**AWH(f@pg(Ej-d>s&RmO=4 zZz)0(ym=X|kSY9nR!@YF$Y;94>6Y?r)hZD(o<(dss~NB9ZUobxN`UqU<6D~w)oZ9NT!PCeV|#Y#6zd(TuVoM{=(8+ zFe91PAOVC{0(ae>D6@RoTeJirV0_cVksd2Wm2KvS*S#a#^YvEql<=4MA^Ou3XIK-Q zrDBVkdM#A+QM@&Rte`@YBl;CCR5TwlAgFRowmk5=}2`IZp_Md@qi*xZ* z=knUgP@YI5WhSs~Yx5Oq2(obWa;Fr@f&fD#v2XsM0a+C;b8RX?W{K%l;qY9!=OGBB zqg=xRy@T|AX&0U+i|u?zrb_Vr0UDR;kFM~O8_-)bC?=;!oda*TKOX15z!okkYr%PS ze$huoB-wM3Y!4g~O`}ynKpn zz?H9H=ez^s#hgftQWIw88TgJs{3^~X>Ajh1n>uM253O2OnrL5TakOmKWUynkN#eBk zMc-O#drCkXQ&kcY%kvb|Ok6LcGRBy46>86#xG#9}pkNkssk)oTA}nZ@YJd+xG(=!n zoW`I8dvPq~Qg)j@3qfq**Ds@9HDPxISL?H%a2#*Y2`m{bDkyBo6>LwT>#22#RXt?1 z`6F5b@hh{ypDToR>qy>$Fl@()c5on^_EHsaD0A%t3+Og*Zls{MSHdE?Wgg^>-D4zh zRtKDf#@OHXs$tl~b-U&Emt`1w3RK?Sd=NLZpF1mCH$OZgU=qR9M%5{?cWPuNcC)1r zrI#6(kqhFPWOX}Kv9&*VJTo&=VV+Z6{RJ5*3t#~l5kE03el1BrxSSBkc2Hr6n6Glq*Ygx zD2we$XK4MWd*dIk#Bl0m)mgzVjy{t7;s#IB>hM=rIUB+PJ9-fiQDzoWuVmXy zi}K#0@GkUSmSq}HFk3F$28Ro%vtq<%J9|5RHWnPilfgbxmp;;WIXA%QKpYN|p+9G_ zWuc`xPYOd+&Ud*mni_n0!^ybn6w5B`8-FC**4lMj5?muxKZcj~%d-FD7RVuqe!NNX7?> z6elIm-mw|vpDE5>a?qZUfCGI3VYI}8pL)RFyUT`?-3?-k81DnoX27k!roOX6b z=~5JP8mg+#BsORGfiI0=8_;Jqs#{Hc!m5Pu~>K5Wh zQ5loIVH(#l_{WVVg`)9Z9ln9W&i1S90z9%(GA6&{>phv~HE0Pn z3x~Rfm-b-21Td~VsgAf9WRb+fB;&;zo(Xvdz)-h21;|X5r3S#JSpiw zs!*uhWa`p=m=6#35>Cbw*JV-@#ZiVVI9DNR%1HdKp_-PsRCy#KiK=+;^~{1H%=}DG zmzsR=Yw|L)SdPm&F8i3=Vj7ALQ5f~Q^#w=}mP=3)lIM zHPQA1FS!%{{DPqw4S!J;4*C*|DQ3G3i4ADPww>f#vQkZ6p^Xs|FR#qi%_~#)Q!$3f z@f_9!p`qza87$9H5yiI(gS8Z4;p!|H^73` z$)Xay_8_=7SU+Ze;weETQsbZx#c$x9Dj@Jok`zGi>exdYeTrzO<8FrRahrtH(DXdw z#FvFK9o;liNNTx{+^5YyXI~Ah8U5Im&P{Cs6G?%Ntz0MQGe1t^Hl!I;Ma&$0;b$Kk z$JoyyVVH@hIH?6rNk-RwxW@M8d|(UpIf_%RAF`)8?8{q?tVXFsOpScxKm?AtU&i+wc7zKk;M1HR9238ZCb zm`Ef=?tv2us8xYEdObqSn9TT2;Y}Fuw5a6^Oy&_}L=_k_3s>xtcl6}P(}fB~vgX$F zp>{zzq=KA`afr8pd-wuN4v(N&iqH6TcSX6K?VvV9MjWg2#ey zK`M5A8FozM zv@S31AmzF>*Di%rVn}zkb=%5zP*%Z?;;{>j!V28h7|FI#GlCMGFU(9(&?T!J4|bq7 z-AgQsCT$eG)bew695FYtA~Ii z-5f$!mDa?cqkY-pjIyUji-wkbsaW}dUt0v%LbSmG%Z(PhW2{Yv=Ax&PY7Hv>tk@hF z9MlI%3+so-*ybIEkBJJ$P#DrD;LF*{^)RLjJ_@FMw+Y@sWE9CfJetMn*R*?D4Gr>$ zvllE)S`_xGiU11zgU(>BYI`qg z_EliG=$V5RW=&F_ylqO@!f^Y-#Hxf-EP`i7GSLplhxW0?I++K2hyjm_v$wWMVWVu7 z9W$JB?AelBcdgyQ$m=Jp)n8FwrM)bQP?wlOgJ9`04)JdVcOl^O58Fg!8AisNsW`u!GR3Lfr9h} z|Hxn<^OcSujYx;d&2ZnX{Y~pu&j=Xwn~kQW7%u7iz^%fNBZ}DhGS^RxB`oc)V-OxM zVm(rsnJT_y=d?8;;DP49@0T4~wBGG*ia(%XL@2s^slK3|R>eHcPtRh4ex@}6+65Zv zHKn*}w}Eu3qk3b$6@aZ|*a7A(aGjwr;_c$8wh&zofoulLUr)!q*4C!R*S^C+s2UAZmMejBd{qI&T+0 zZRS+{D6@JnkxmAQlcC*l?(1Y_1y)CN2a7mxdiQJs5o~X=_?b4#LY&#WE}AA6w{T5N zr4QUG)wb+lTt~5OD`&+k9Kh`Z$VV~!v$yZ__!NC=#*i)4`Gm~ug9@E3gW*4r>6zS# zrClQPL#l|UeR3Q*qozzjbUiNfgY*>`;iq{l{o$Y2QVs5*lVgL0fc8mSI`N7CXqfHnI_cCi^ z@P<9Oa!;y|Iq$ENOzV2|l@J3~(mxpJ7Y;sKXiJG!Gho&#)F6fj0KdWiphk*m^Wl)O zM9}w=8CbiG&j6OC|AX5a1$bhAwOAYqaD+za!l^YOK`pu=r2%FGtU0LpmG5JM^^WL@ z7RsVYeVoAfB{+#qn$;8^E48XWDdlU|WD=&l;t)b+?xCqP9&?8Ks|I&yd6WPG+)WSy32P zw+w?FyE+hK;eW8cG;F--lm8I2qoUX=rL7PnH56^W_MUXU42l+{=i1_^aJjZeXG^Y7 zZYEAXRJ6m^O+InWV1%$g7BWgbvbPf5)9wYb9vKmp1+w=nsijCJuG9JP9rv)Q3m^0f zvKe2p%$|IcT_l}=`$FbQ|Bt(#Lr9lsrX3Hyj8^6Fb%H1;!9pK%1cWiPbmvW9+?^7& zeqwL{hB8(xKzizmjf9Klt_T+(KDU_wPB}qY@pzRNE4YTTg!i_1ZowXC)dUtw$=Tkp zEweyuI<_j0xZ67?xg`$#%w5tyCPrUMqFx{!#^(ppBbq)^>d0QPcJBO7!BAv+_KY#T z(22rSCdo-T**+WtEx1gr#a~5_UeMtzs(pu3^K3G}9^R^1jB3=lclz8(MLl4X;bxEs zs;{*e=T6H()rqG;ufGf59UgNdkq@*YpL~7Fd=gxQ6YgScssb~}ZKVD-;r^V^0UPzv z&gKHNllB)k!0BsP3=Ds3s62}wJ=RsO?G4u0Qa(b$+z?w*n#q(FO)r!b!YrCD0U3oR z6O+*KHT>pF^}E9E=TA=SkI>~$db%6=6G%+Eim20qM5Y1Qp#|Fx?Q|p%8nT{PEAxnq zGB)~PY-cKya*`nQjaBNS>tLS=*Npr#La_>=!iye6Cb*ncN!Q*`B&H)|vLg z5zy&}-fmA5Lf^Vai{B=c`;vQnJ=ThfahM4vW;0W(@g-!~+Zo-n+GNuZcbpeYP9qM4 zK&Jw8@p>GXaFsU~CqYD-VnU;KmYvEqB3dp|pY@Q((>Vd@M&$L{TQTlr%5>v1lI9M( zO=kYdyU7upY3lh;?sYs1kYgYHmft{(LKa*a8)lPXQo(Um!NkAM7_mc?*IH+X`e1?- z1G?#Y;yl%(Yv))zY0{63SAw0$Ft+J|9(cA3bV)K8-g`^R)6~vJq5%PR%gWyk7C@T7 zrI!BFv{C4s5vAqB&@(I*d@ZbugAM+;562k+E;ne#1t!7!D4zuk|8blj|lV20`L}CX#anwum731*MH-y`kr1;`W?L>j1iz0{2MnXplSdD z00IC400IC4|3U)4>IJ`vLjlMJ|L0|c-*^K+J@`Mb9{k1|03yQwc@aVKFFA+>fI)!% zC@rDdJ9Hn5VDT}V)e#1{bFWKburM26GOZ1kNy$)%$5QiR7qV{LFvjEk2H{wNZ|KG@ z;c%I!Kdxq5@6HUQqE(o}i2|a?MOe0%>uUoRbX$#Z*!LeC-STz~*TBJYrE6 zJj73@nHWwDr}1d$Vgs2Xtq~U50u44~%vN>J=mG6@aZ*`X#GT|pPZXBVOHxml46DA^ zFB99~e8xX+W!3~E+YGu4(uFu^fTs;yg@Y)Thv|_ZT0Slzvh59X`0;K}msg*DE|BPc z<-LsFCw<)qp*2(4*C)L5SOX4QEuv|8b$y8Oy+Z>fhmf(o|Xf3F#`z@h^bZbT4To|rmvDBBA}s0I`ws=svFrHiW};f^PLKG|Akc10L9r$fTGt^RR=>T*zt*0qE5 zg6-FtwAzFxHZtih(-CvBx$VG_L(A#D($xKidE>JQmBTCwbfQp?X%~tEU`NCo*@MV1XowogageOF$g}SxEOYiGF?;1|e?><+#sRl__#Dlmb=tTyX z3_)e~?H=oPRi*cxOr*N?dAtXg=b>9w5WDcv(cQ>sp1qmguo2imn2hpiy~qrD|GZ3( zj&Dqkc+iRAV2UYQTs45%C!i^1&lP}PdI4QI%~}Fgoc6l7sCu?g^>ksHs}@IDQ}rC8 zjWBueUN;Wtp-0t}#ThDg1z4#361bc-<+P@UN*5H|#WXzzjzwz<@f~h$s8Jh~2236& zvz!`;s~^7l<7mF4-Q|NQk|5?QYpaL5S7qsrwHob}#FZq`*Cpb3wOB zgKh0D51*7vuXc>q3x{3Km^Gb}t6Dtm4W*@LeIwD+XYx6Mlhp9js5=rtWxNZgdghuN z1PPJF*aVw(M)B@hD0jLA>`S`sV`f!RV`x54KtI$ibEHt_l&7>9V);mr=#5;IZ(g~BRaX*-zy-{@5bu$D+8dQXiy3pRbfX9a1JX^ z2>P?cA^BQ&%h#&LJLa1ho>3E!KDizE^HRNZCcuJ-i&6D>;gJHM6O=Ur{Vp9b z$q2w;|p7GQlE1*8kv4uTU7P3#w*s{2sHFhW1zlXkuCD(j& znvwFHz;IR?nYlE-dw`{vktpBa6s!lGoDi7*2+K@xWoYycNx!vH$U-}sV;YJIGq2QM zT=Av~qv`}QqwL*bNUKxxK;D1cAZjvDd(cG(p_PrW)x;~4WRR-?`! zks`-;@DrC-B9%nC2c4c_@g?<8PJl z!KmcywmB3~RLP0Ub5gxyy&qT&Lvf#yw@(Oqm_rH0$|FaVZs$}iJ*vxU^Jf#e$?RyJ zEj3w%C1wy-OjoQ%YpD6H3rxV&a@(TlZ2ad;m{TjL&i-iDaSInTG#R<@?^@&Q!Q& z_ntj!-ToM4L-DSo#HbnJOPZM|+1yKN>x$7$Sn8F3qlN!^VpQgBC>8z=?nH{N$o5zN z4G95BVtV@~@uMGG96RuIKtbw415r6?Q>l$xx_R0To&9tGi5Cjp@A3XiDYy3eM@$wg z?AfPHo~hoL?%39Z<@K>I7~Gd3PnPadEXY&Gc2R5;6;JD?eDXZCn$SGtF!oSV15VTs zWJE=);upr{$1MbF9Mrs3U-wuTU3nXB88lCgtz;m`xjibaeDYBh!txhgmt^d^-}j@> zg?VE*2jRxU)!Lq$txC9-fgj+=M5Yr(LClgsK+^3xn>a$EyU~5TlPJWlpz4&vc~k4n z!{Z`tjCtw2^_cS|quVS-cT{}m&456n;rL;uU^(xlfr~SN*SKwNtNv{}FnmGXx&eP? zw26(YE_K)|W9q^Ovhm$*1)Ov?6d+k)!Tk{iIqL2Rk0+NP=+3%?Ci=;a@bOTwu6s|` z4wD_{yAqO%*$@G!&^^3lnn1aXSE7rBQH1vBPdhaDu#93hTEfUwS_!Zxfk4S%GF&?u zR@;qRkVvf03?wl%v-^vVJrLwzeS8NdZZ{E-zN7FFIGk7KbZnQoP;|)E_=#;okngG) zjKF4(xfQUkS)d}wK&uRc7^>$N>#$Pj8@*_k?38}ogbwmAY0!WE@#7l!U(#>`1OGJ# z_fK=zZ_A&*o96bq^c)~Q77s|QC z6^Zo>NkNU9#08@swb;?ef`FmY5lp5C7I(u)kue|%1FajN@t`x+l&WWKvsA6mJ!x=) zwI~X>@6fr7B#+H>j7dBVdzPfm=M#j_(7sFCZHB3QzdB5ppIZ=(90rcmI5aF9zLUH= z>e<(u`flb1Yn((E_SHN5fH0ViH^rtaKiHgKi)&h#H+mU$)tiPjVY)?IfWybJx7O-Z zAh3^(R?QT*93!kQ?3~@m&~`Fq`GkK0;*v-^VF>9cf*>u}d`!LmVR_mEB_v4|4w0I( zA2UOo3p{n7-#=Kt#)JXCt&~MWu#Wu8tcfuLf%toPm#50W+3M!YJl7be(-65YL1$%B zTvV$d>;ZM~4sJVY@&o*#(_RAw)e1Z~I~m)TngJ5=guWSZ(_=b@7I7v+5bC@)$Qs6NgrXMoL*DS@`CL`C7|w*JY-$ zTl?$FRX)%UzH=*_^ghh-0ZTl3dw7e8nkRq}uK%%lRV>!5EKMa;r$hmn-9GMwwGZ|H zzq~ixT?skTNH6!XFkQ!G`SbAJFR@y-Au80_47^e5V(sHNTBYiAuI9VI>%=(I3p+Jm z$b8MVSF3(N7n<2e&U?d3TG($->bK6flIO3~S@sn&U9-;J=mp>NYlv$xx6CC1R~buXyG(FITyH{R~T$Cgv?nSRz6*sU47sTsXcJfS@?GAj7#av9$6Zx z&ssXq&`y@kJxc0FTTdUH9v{?dnf|)y<{%$in3^PQNn07!fB0kFC^u zthHjFY+Z!rj$`IO`ACN9G8%V}IdFP|&AeWs^?`}2VAQ?;duW=(0@eFH4W1|p_@g{%%~}RYN{|a+Q~iR(`ZQ>lr7W42o=gPT{vZ zY?w$CXxw2p^nv}Dqzg;HYaSxLbrcW99$>lG6J~k0gsyQ}LMZP71a(7hRM|ig#F;a) zioiJr2BfE?mc~6LEa1hdlMKBjOvSw}_cg1xtlR1G8ryyhgTpizN8Hl=MG1!MjET~# z7mKmui4~+1B6+2a6DBYqs1H1&xM$q1E8Gjq9!I+Os4OTo`V57Wi)`>mJ2#qKCB8wL zb$qs^g50F-X%zGpI%;`KRGElRmT@&Q;q{=Fw6dHJI83v0(-;HScB)aTwbSVbky^Mh zh3ZN@~PwO);RalJJ~x|)kDF}2my-?b(JKANMm*;ptm5P!Jb!~0T#n& z%ui&KLZ7JizkNbrcS5V4kxM+XGQ}Z<5gnE=O{9;B`0fFIKVPK!5Xi5Z5U@MkMZ5$I zgk2&h+=Y@aCAhT9YoE2WbPngHaG+iT4{jRpxY(K4jZ)2+D(4gGmIDP&KFx3S_SPAJbYxRD39ZDAO7umQ zc}9gHw(e5sw6PnO_W8Ubc8@q7XlkOzfioJ48-=g$0E3m1oiTPo)fd#G%h)*=Kfg@4 z4I7#zeRXc1ZN4p^f-vqaiC4^VS_NuHuwji-wZivfTR9{DAv#JarDPSs?T>wJV#0=~ zZX_EY}U-le$xM|J!HM8qlN@puroBNi3u) zYQ{jFrgoZ68Tyn=0)tdwNazLKxT$i_6fr7!kq?3^?U&R>*BKV_)2=C;$}wt@DYP)S zd(n(;RD`ChYT5g4<&*|8wcsZx)Wt9U9Mnn-#tzyRD>LbLnTyQK=?k&8I}p(H;I`QP zL_J?HNp7_I#-JNV%A>3K2(bj^W+r&nfJRo{FI&dca934N`#3l98LpJ*r)BoZ|frWtmc@FfKQjcFNRe=8HHp>zwznO)WzuMscZ|gyV{=b?s{d+#?Cx@TyAM*G+ z)#Lp79&r82{|Ci)KkA2ofq*1zehdEE)nC*8w5Wdx&YcVn%ch}yUEgM z>ilSRe`&R8f5vHl#A!AECI0zm{PU0a=ilOVKjU;i;&i{o8GgnYe#9Ami!=R!%M1U!3F+5gJNrN2AuA!t_v>f0)Q`S8f&Dy#f4lQWKf)y1 zf1c;{BQJ34&-1c=3P@ACdg7KhJ~vkq1ZdH~++cYvAAe9Q%2_{%rs7 zy8Xw0u1BT+hWOX6{y%@N|Abw4cOd}k5^NXtr8{6D7eZli8fAWH)W3VZb4$Jj68$w# zeh#|j!o_g{z15+V%Q>8PQiYRAX!5QiP?r;Z8YMk`ad*0RA?aoJSu=r$YzhAm4SAiS zgcSB#yK4IYaFJ`S9wve$QLl9`TkMlH^j12ymqoZLEuaAFZ7MWW$3(T+Jd)qp+GaKA zMyXipP)s|dwNbf1e@t&+zBYhh6|XJwr7I}bNgbb4w{p>_SVcZmc?_uy^o;+lEl9lj z$5+IYhN4M#o4sA6MQ4_fUfg;vHl_nJMAw5Nvu~v|+2cl^y@~m;JMe~SUWu2YZ!`ncS8B%glADs9R-_RQcf-$X| zU}~;S{OnWUd^N4O=MMv}dPO3sMPC-_pHMN+5=;G{J74q!3er$PoK5*}PJ1EGkHo^u zWt;+*S8uliYqK-(uOA;>eLxQ?fqR5=tqIes3120(3JOLF)Tz%~Od>7CF!0YmqF zbu2cpEXJN7K+~W(ddBz2$S9_tmXf>{8B4bai9Ec=BoSbWpzE9HXp%ocoEOISM>RS7 zfX7)3#KoY+Nx!Vhhx8(;mNuMma9RCQJid<;zlFo2F(P8ue>?H0<3Wl;K3{gS^;#=O z<<%tPBTW}_p?mc;FVVhAh;-;#)V4l9Mj}#sUIO3p%Gg@i#OcXeq$B*sw|C$n`_d8F zDDpc~>{5cCTVPt=$6L$?RGb;)NmjyRt2GYJUSf6tMX0wgJONzJcm>iD#mQoHQ z>MNBzm0reB=!*zLm7YVbzBHc5YGW*ao{EbO)fJ1>QP$e!lGlJ|#zZl0l`Ag~wpMb( z#M1?_*lroFnH*Yx0=Usw{ugycua14&Wx?9gNy4jaPx+CCBe|H8A@|7{hRZqob~sT} z_dZM7JY_O&N>w`Wn-YgyG#LdWrZxZPBXJ@SQ{lVNdQk)h5v9vwiLzN>!&(}#uXV5+!5ZyFAM@ zpn=Uo)4U=;fXPNIuK*&vx+FK$IifsoXAZ)&q4dQCl1+1DEB0P|9)MkE#s`;_7Q9$A z$0DJc%euQZFWQR9=(3M-aV9VCS1;##<^Pzh7l}5TPR?RTvyr}~ogZ-jM7HMZ69qaU zvOfm;+}LF0CvDkI!5X}1EK z4o-O9iFgAI59X8H2*1_-{61{VsDN-I13{uZd53jO```=gAp!ybU<;%_Cs{HN^!Anv zcym>7V$UrfBU#|~%03uBk6fwbr>0e4c_-wgTLA!c75*(-=b2Hpofwn8ua!7{oFIFi zT<7`ts^V<-e2XEh#is8lA&I?w=&sT;YJY8tozu7_n{{$xXqN;(bPqQuyk~x`J;5(v5Rymz|p13 zprDO3`QOpZ3`QN&X3Q@Yi{?TMdonQ(9cO)oBD*OTk>6FEi(s&=8?!7ith6NbjtXB) zlBTvzggLE0skD*0b9mA;$e&q#z+9wkc(A#YM9&59E^ofhD^L;G7<5x4za3|2Q^&8a zi))17EF|bJt4lkaEgxNZtjx?cx8y(kcuyVfx-V>MsJO^yrCS5k$ZXtC#oJh)exdkj z_>c)l9!LgjJ74Slk(^@36dW~%vr8k(5k(0QMD|@PU@U@7DTr2t-%TmO6wBp`-JCBwS#p%lJ59`YH7;s3VnS9+AM4Z5^QI-~ zr($*}-roTuujM?j&VMHJuv05`0rns`p|r@SX1TNb^r=j)6PaZB($@6(GH!XKGa^xXWWarx4MY&P5QWoM60n$) zuOLQJED6t;a9MB5GL`j~2t>46J@GY~U=|;&H5TjZTvP#Ui+st$P^2kr`uG(?wis^) zp@)O+b!eNkY2l#i)Y8qFWV4KeTES(d#a_5mV14sg3l;f*%VkPu z>qb%wKpbT};IMp9U{1$0r3=pRr(AHT%iNiwKXE*dp zHxX=tg4%uFu8PKqyWbX&D_ZzOQ9P8=bnRyveKFBTDFEO7_Uq_Ehf~;)?Q-nDuzjzaftp4byzhSRZ zVPMy9v7oO6=kj5~G^WRCu*x4}qZJoOQbiM*u^Dbu#$aD^oK26Lq%%SEtyLV3v zTRZHmtD{lm2)4!d*MhdlzWKpMrr)ejcp zG$)y|l1lv?!yWvpQWeSTB_WAZyu|B|`_<+0b}a=Hmcyzn?*s;pUA$pSL~-+@ktn~c z8KS7lSZyS2?|_>i6JG=<@kGrlMk57%um#)^OxOJ^yOeJFfln`P2!Q}oC0f($a6}lT z+-b#kMmrj?)LaE~u(qVY>_7%B-#^tyxHwJa5eh|8ztZCYWx=L+$i-SB(K-e~T${P%9O zaPN}NC-3~j&C43(Fh!N|xhiQRoZ#jsedKpc)T?gn_NlQj9*{7-IjXHfcfB$x)kdE(sbXt?So#4t)T7@YOOEtvW%(A-B}Z8N zh_kT<)1p~3b1vq3MS+gom$E2M3Y2~rhJ~T%uKL#3LBP;vnN@U55vMiV0?9Bgv1gzpb5i%A(X4a=3Wk}f zR2ei1Ew1}6`ZL1_u2pcwi#vY0*d;TLGG=Xq6%{#SMtEXDR9$vks9`9xET%m~9Vslj zb!DKnUr!x{=$W1()UhM}BqC}$4Ymo|`(oM(R`TIYowbuBl}c&2kgp$DHF9!Z<2@?l z4ozKJrw+^cH9T7TH^N*NV@bQsljvX*jfdi-n#7`+mCl6r*Q^3I1nwZ1rYl}sIp{e) zUt8nKQzGD-%Z7zD_ZC9Z5mBzh)2SRC&Ya6WX*K!wW)TTb(HL=YU+yC~bGT%s9kk|^ z_?L!#;L_PmRnPMgIG1ko$Ym|Wt6!Q~6EIGKux z-{`DS9i|0-3&$L<@5PT;mxzJKjZc-Z_ATSE>AGSO0aWkj3tO)7l{uG$L9k#fQ3q3y zI#l3hNWM*JHP>VUukG@dCpg9ToIA%k2(B5ToAA`}KJ5Lxu(!cNyGiC;n3GYB|CxN4 zLb_=8yD1;Y6n6S>YMJ0#%XyBWl@=BJRKuoM9XN{3;8BQtas!&9ONG%r&T--r2kh_& zTCh`PqocR;(*-77%_-O;KwSrZnL4CVYsh;;GDJCE(&qpOIPNtN(JIlZ9{2T?1Mw$! z$FK`G3JMRI8;J8rzrA8!Mut&%Mo$r!J{6l|i(8Mc1dYG!T@kw5(r&Kkk0_$Jl+?vY z;7lTb+?~`OZSM;;8An2l-n#?*kH#NgMPxCv*1Yu z-DpmgB>daAd4%9LkKIY&m-XH_qtqdImT^(6RMx1yVPlEecArn( zZj0t$`vExF@i&QGFH?vgpHIiM0+~b@FY3e46Hl;yE?XPl&wePT zQ}`oaaQO$m;7iDxFZidL`n{dZ8-X_hZv@^5yb<^h6ZnfS_)#?q2#5smW)0q~!9Um< z{BAV9xq~-%@DFwezZ;Ej2I0*h{7*FqPpAI#0KlRaKAL0JH)BKAPH0hM+tzTz!Q~a9zLL7G8E<^ z)Q9JxgQof(-iwi-wPWY&&;AiLXtjFuU4xJbL^@Edg z&_0y_g!6P$peHX<2l=Eq@Gz{K)i(rbHdIKuj`N3-idjIvYJj=*T~|Uk*A9gkzkJ(X z9)?Gy;%!Yix^KM;g5_ZEEe}B>5O5syi22>2vfi(Ky{C`e2RO#cU6384Mbe#BtceQ` z<3p1Tbh(D3+^>4Vxmu%fuuWZ#45H>8#$W7mDz65;XwlB=0%iA4>#a|N_H}PVXR?2w z5y{rX3Pg(&D2USW*ZP=eixhiPk1io-cjg+MLY$OkRp5$~Wn0mZc;K`JXO)B6FFcE; zwResTr)1Rx>CZlgJ?kjB$46iF`og@Zoc9Z>l08={NuESXg*DyPshz#E~GI3)#reQWaL{91( zy|;7ieco|Y$QQlkowfw#3dG2tlT-pliflPydu~nvQw8PcHwaZi++*K72dpAjywYWe z0g+&<9-=O7n0TE?<2I|Y&hZ|;hte@#1|V73;S{WGQi#UVt!-}OMrbfbKeUx6Au6m1 z^81W{iui=W1o4rEDc1iwkXtlu2$S3-2M;jnidti)dabE@#gwFb2(i#s%ZgrV$Wh}%oO>IdgO(8 zFZMseL+Takijk$3SCD&DNj{D_0vcP*oVq{0m{nWr6b*&VDuV%GiqnB&iV%i|mJ{_6 z44NyQ?;NA^YWRb#^)M=W2~mfwVpd^Yh0Qj2#F?}Jg+Jutk2o_D>jbcl<Mi@kAlG~(aB$&Jg^+-7~^-4F(o+0A(Vy53=$-IESFpac{3WjxGP z-k+G`!3NFrqqcS(T=iuY$nqd9g;sA!1`MW*3ad;p*o*KdTF7)nS2+1Zn=a6&(=s6q zoPzdOMw}qqS!rQ-dA1|PS8?|+NT@vJ$5;I0nk`Y6=;fC~+wZxgaT`o%Wk#KGhMUdn!kCw1ssNc%p3 zxVmy^P3}#u7V#=U4;LTG3e7iN#F+=;f{KZXE@_s)C=u5QUznQuiExFn->~Pp=yIji;FC@s% z;f7&{N4PoVe=u|GDeNZK$L!^yiX-`qy|hSSL}obK;|_D(>_HH&WDp@{kvLG)Bz|0< zW)FjiiFZiG_nCstTr1tN3Xa(G%7|XHwROIk6NJy7{>FgT>kv;Xf{SViR_R*o^L~eA zj|*wZyxHI(u|70?Rfkdey9Q{w~9_JZ=MAsZ7arf7-o7cqLf!kJkQRGt*=p&HM3X>M9;xr+BHJ}5qQvZoliA4C&3@iRR$_BFUbHg(00@lvu`A>efePs@nH z1C*=#*w^LfFT|O!%T6k3)-EUj=`^muxd=i!qFA+ZodYt9-b-_9Vs2r6H#thAuOYTw zE9rn;njzVT5mkIB)Rb173L@6v=2H`O*bWs!=pD9|Si#EK0$T?ZtaMQPQ3vVLeFlwu zkpMio?VbIuytRrIt&u|@Uz2{yg}0SOKS})8w*U9aq9E`;UkUVcWf7CfpXYh~=w{;f zpXX)$$YU`6>)#Ol2>+$|xw5FX`_J=!t}Md&B{D!wJ%ZcbJ=Si% z9*x2yR!)7lDl9`uj7foiv*eB?Dplg7tR6vxi77Rg;ePQNPeq1S-(*t7v3X1O;PF)5 zaEzrZuu+L^TuQOQmzyXuHP! z*oC}{OYjmf=2M51a;h?Zq+T2rpzI7+RG%4o@*$KB<0#rQ?y+1|N zhM>=)KQ+iH3clnVg+^fzO!vH%XP$uD?!zM-ayye-GvhFL8i>1Xf)m-k=M;pBI`KF_Jljs zyE(_#ku}Q8lm{>=pVzQO_-S9Tp*S~(&Xe-k6OG@BDAh7zZ?7P)@{7PPQ|I3*xucSO z2h{rtOSG6$@-4L~Xa;-v>AlUhtDA zkaa$RY`)t_AU}PvpVvhj3m`&Uh2!3p7>JP|2An6DLaULXNY9 z=mxQ50eUCw1Lll_<;`;yaGKjVY6zws_r2eT=%n$?+a98Do~U)S(&4Hg=2>Rj#~~oT z*8g&;H3@FY14;dt$)&fCnhhiW;eooIzSQ8r+Q zQ~?tdWzi09-$mqS@=e5o6>Vbc1E^L2C2(W8sd>8~vs9c!!|~LcXjxr~dul*JXR-N& z+9XbebCnH?bdK)$-?Qjm4&;B@eMs75LHIJmphKj{zHBM2_})Qx-Cj-G=@)`oRk+9~4>jO%Tzde@ zGZPhp$b1Z>KmwjIFZByQ$KeoMqy#P2Ce)E6(763^rlGX-S5-5N*4{7VG(jsu5A&6B zF)&o$A+OA-9nWNPxcCF7TMx45*J^hvzjO%_Y9L-vnFl2<$eLH_m=%L+)G$q=Ue%-R zyh7>pI1r2e^0`y+$Of6`?g3--kt8g3bk69`@|%(ZAU`cqt-t9}gU2Z!UxiGa?xPW1 z%)5>N&Pt-3`N??`<7Cd<)|1h8@(UzC;Z?8=XVKm*_{lLS?v7x?^XH?}BSAP7Ye_iK z0rq!HVv;TObNBV!j8nM^M0u`60^h#Zpg*q+0B#ZMQo&~&KA54T!=R~oDWyDrokPQI`Ji|JtQHZr6|F9>2^pa?YdABEN$lpCi^I z6NmUSp4G9$5i0pv(*Tx_488Qz^2kDMfR%L896TpUPbS23@gR!eTA z0_<^*g4TaNh9bh9`6kQsJWo`KM|D%WIiDdHq_j$mz>8}AD&7OZ@X32yX@g~Soxbg`7hb~m`J>qlPU&Y$OH{m7&J z^4F&ANBA#I(T}6vo-LmxDhti=prS?UzSu`s?#}GmHN=r?==Efj0tg z1l|a|5%}jL@E5cAyL!?$zxd`C|G|FoccbymGQL^Hf3Riz-DrGsjc=~;AM6^7f4!Fi zP8J|2z(0?2{PS(nFMiCsezt$O-SXdTlb%2Ho8&No`2E!F|MzXu|2+l!lf%#U4|)9K zx1WZc`Rxt(wX1)rS^Hc5|GC=={N(Vn{eR=}cTv(ayZ;3|em|Dqj^7Bp5qKl;KbgP; z29GQoKxPV5rY{>LBT@h{cH^4+{+UbtH{NTxzV$t?z78RidiW&vGVpma3lC6~0V3)g z6I1E!yPBIDL`WamO{1#=uZ7lpqQ8R!2sPNWJwxxA^FtSyJ}B|r%}a@Ux;9H> z8(FBLKYBi8;!eFl>D(r_+5lBBm2jZc^FUvp3@&wcKN3%5h$hECt0F*z#bW#N7gIlG zeLyHWtQRbM%+bh*a&sQ~lnNPSIa&1L_3FmX|7v}UVpClAgJl${ zZY``F#<*sQb-w}e+FsF2Nr|0P^=_lHza8x?1zL8hGB+jkkxGT?aN^Yj8h{H(Rrs#< zg8bnw?uU4%t6(hE^356Z@jP*Ekm6nNywS6j%)b1QiUbO8?b@pOFRw~TU0QR1@!877 z0K=T{+3r*cm={cWJpLE#y}Nc<6&IK3_LyYI!A{M|RFUPgn*xyx6l}%c#Kk6pEZ;4J}x8YocNo{OX%k^saMBNEm4r7<&$rX(e-c@qv zYJ<1Urt-jBvEX0Cpw-LejWrcs4u&!@hgYsDM5?Kj^B#(&c!3B=$&Rq_2zSc&_?+;- z9BZwhnz(ki^5Ihc6VmKHSi$Opd?x&I6Be>V6YLo^$NYtF#EpkR<`{IoBlRn5^A3eR zI_RU;sz}JtG*a7!w=)1vUJ%<3KZWX}&!P|<4?E=Q>y(V!B-!aERB12Hh`mFLqab~D zw1p?Rc_B-6o%`Tb zY2xjMzr5Vljj@%33hXC$+N7Q;1|RsDJ=-+MBbkM4;VVQl+oera%sBuQ{yNUtKsMOZCRPIUuCC!SL6=8S2Y;XC zLV@G%t}KAmkPk^)oXaMf#*8qj7=C4mJUve9hl>gN%_@3 zkEW#&j;s%+GzgNsK7H+u*>!DRGaz^k_le4wF6rZB?`l>AX}!T9E5`XljewJbA{rJqe1waPX60 z!$UB-Rds9O_ms$=*vB+hHCfx<9EuVNHzPY2r^N|9j_jgEwA1m^TXTJGf6o&8KA@em zN?&hTr{I<;vR3OX(87v}8T)0oQSFeeWk+Sb_kh1o!L-L(>In;YV_PMwL&wrB{ce9niS zJ{yh%@dU?HpqP_`1|BPG zcY7vh)`7AWO=p=ZO3|K`$Eod?^gP#2YD5S05VVTz*ZjKi+Fyy_l-|SkURT?6a^wg% z7R?W+Rc@Vgw8t@(;YF_n67K6J8K<$kKxD$S)uKb_kZy_>h^jj2Q;mBhE;z8b&RI~@ zF1rCrFpj%@^v&1bwLJ|-ONY|-y;5w?ZR~2^9q*L(ii!gNsuKb+Y7e3(Tjr)F5pF=w z4N^iV86?Ouuc$q{dU|BHW!%wTrySjy?_>#wqdq5!Q-5e7!@#p8OwpOROwp2O6Wx=j z-^bY6=>j|EZG&L(f^J$?HHLis;V`g%b1<*n)~FFoP^{4caLl~ z2mRpcVPqXi6N%sOZCN3A739Ur|j7z78mu|bQ?)7Vrge$1+u2ze+lxU^C zKCX|=dg4CZ48tO?7EoJbg*lpWnea*x!9~&dJr`n1r`3|1n=2aT7lcyT=gn8U2iL7| z$!?US*&aT_K>Kwj%3>9E>rc)X3}Y%re^ewA|5twGU+0pXD}P-i^79+^U%tx!YwFw4 z8-X_hZv@^5yb*XK@J~em!sPeM&(`?Rxes0@WXSC6(HD5mlvk@M(swI;2wrW=)M0!?#`D?-G|y<$RUR2qzvt)ONJeS z&Nk_O=@pAt*xZ&T_`1#YRsBnA|0Tz)#H5Zm--j>=C+kwZ=M9V!OyS8*fSNdu^3B)N zdcHf%t(($W{HQq9G2Y2B{l~sWa+v|S_D3)8YquO=09As4kxE8;uT{`@njOyHGP)^> zdPUlfU|C|NRdPI2KY#JW&*2H5H3Cb~zzPa=Z?4XpaHJsU5T{Mt(`$x`k7O<|CSE}- z;UZ>7Y7?G<*q>fN(b@kZ*#W(idS4PTSDttc%~(jvovR#}{5riUeyBxL@p8sEe?BH? zWQEQ-CU9JPZ7mjJtSD`iKu3XTl28z)&kdSNBe<-UO=IX_z$%PD=?aUh9ZZbC6&x20 zSQwlp&H%B6NEwzzaaqH16FDr0^q|Mv-U9vXfBVoLUr!)#T;wrG#Q;Zq+AO$-5^B{h z=S-2*Fn5nwo_o4C5RJT8+Vaqf>GUBQjxqb4eB_#obUMjR%hs#gc!+~kwz)9wPA$!q zk~pIa#?HAn-((Tubmo`O{nUnl&qjNLAO$Mownh)(#?g`SWhi$-Vgf_( zEl161EgwLB#^rs=rP0XjzWukm{_sy`qw%y8qsnq0n~1_IQ4T8wmFUTo2jVK#ZJNIE z(&D_6pFscyDMauXW^q36js=Y5D#xKzC6|P%t)a3rcoMO^$`3j+rxAE0bQ*#ZB9>?| z<4>18-Sk_Zil@J3?2x&OEG;6pEOWueJ?l>xp)P-a&>hsRVsxK z-Ktqb-oBtT`sX+--HgDz*FdIcQl9|dVnqZwQp7Q+&D@A}%*9g!>DPnjKBQRX5c`NR zZp{gh!|;p5kPNUvK?Scdg*FfZ$4UCETns|*t3xZLZ^){nuFIdf`hjx6=K^a?rK?X$ zbgl(JEYDSq419sSr|NEj#nT)%&*NB~MV3m+(H5)BjoLbV%uWLpy{MmK2bvv{8sbJC zE>_x7_X^5k)l$o*TUHILU`yb=* zgEF_Qs6~Pyv!n^;`NU(FTTj0ZFbl~%hBQ!Nt6;W7L36HsR_aj1*7)8n89+REX*k@K zage}P6bNffggdZV6bF8|>vVg=l`hxu+6rq&0sBSCv=d#J-dG?C>T7nQ!@%bfCm`vi zxgG+<8ZTIHnEm}Sv-ZT#mHoRPj9#B433ykT%kLIbDqWee{CYwV`zcH1FY0_pfd*5N z4Q6jrXl95hg@hw=23*Xt*R1gtHg~-TpF0mox1DXUU&9mtC^m6GzBiU$ZS^Ha7)~8d ze<8755thZP$!}pj!J_PCb;M!p=|h3Rd(`#gWxCAU2!O{5@u3^-GNq1J!!2tXkF>nF z@=Ew5EO61wRdN~(4Od&0%^;}BgsFb;k&A1PQS6u7ly9GZpVFMzZ2Nfs{9<_LD&fvl z>DB{b^v(9YFHq$X+E}IRT(X;n$)PZWITlY;eiR=NT!u0F0XFSk$LkF zS^k*HO{+Ro9M&7~0(8cA!Jr==eb=q}sdi$GIQ3}9Eh325nP-YrOO)ObOsNmW;e_lP z)fYuK^=Y*jy5UbV7_3>|Cz#k*ey@ruRvpw?-SF>_c}SzrTe|L8K}5QndYw{Lb9Ysx zG0#8huPA>_L<->1tnzm<{p^mzRV~wXkCNEdI}90_fE)*~$*IP(-sN3B0s1h5CBt+1HhwjcYjmI707)K!7v$EzuR^$TW!k z-q;N34FEt`fH%AIzs2tSA3GqYi~s-t literal 0 HcmV?d00001 diff --git a/tests/make_regression_tdata.py b/tests/make_regression_tdata.py new file mode 100644 index 00000000..03deb422 --- /dev/null +++ b/tests/make_regression_tdata.py @@ -0,0 +1,69 @@ +""" +Script to create data used for regression testing. + +""" + +import numpy as np +from numpy import random +import h5py + +import bitshuffle +from bitshuffle import h5 +from h5py import h5z + +BLOCK_SIZE = 64 # Smallish such that datasets have many blocks but are small. +COMP_LVL = 10 # ZSTD compression level +FILTER_PIPELINE = [h5.H5FILTER] +FILTER_OPTS = [ + [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)], + [(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)], +] + +OUT_FILE = "tests/data/regression_%s.h5" % bitshuffle.__version__ + +DTYPES = ["a1", "a2", "a3", "a4", "a6", "a8", "a10"] + +f = h5py.File(OUT_FILE, "w") +g_orig = f.create_group("origional") +g_comp_lz4 = f.create_group("compressed") +g_comp_zstd = f.create_group("compressed_zstd") + +for dtype in DTYPES: + for rep in ["a", "b", "c"]: + dset_name = "%s_%s" % (dtype, rep) + dtype = np.dtype(dtype) + n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE) + shape = (n_elem,) + chunks = shape + data = random.randint(0, 255, n_elem * dtype.itemsize) + data = data.astype(np.uint8).view(dtype) + + g_orig.create_dataset(dset_name, data=data) + + # Create LZ4 compressed data + h5.create_dataset( + g_comp_lz4, + bytes(dset_name, "utf-8"), + shape, + dtype, + chunks=chunks, + filter_pipeline=FILTER_PIPELINE, + filter_flags=(h5z.FLAG_MANDATORY,), + filter_opts=FILTER_OPTS[0], + ) + g_comp_lz4[dset_name][:] = data + + # Create ZSTD compressed data + h5.create_dataset( + g_comp_zstd, + bytes(dset_name, "utf-8"), + shape, + dtype, + chunks=chunks, + filter_pipeline=FILTER_PIPELINE, + filter_flags=(h5z.FLAG_MANDATORY,), + filter_opts=FILTER_OPTS[1], + ) + g_comp_zstd[dset_name][:] = data + +f.close() diff --git a/tests/test_ext.py b/tests/test_ext.py new file mode 100644 index 00000000..b2577c0d --- /dev/null +++ b/tests/test_ext.py @@ -0,0 +1,627 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import unittest +import time + +import numpy as np +from numpy import random + +from bitshuffle import ext, __zstd__ + + +# If we are doing timeings by what factor to increase workload. +# Remember to change `ext.REPEATC`. +TIME = 0 +# TIME = 8 # 8kB blocks same as final blocking. +BLOCK = 1024 + + +TEST_DTYPES = [ + np.uint8, + np.uint16, + np.int32, + np.uint64, + np.float32, + np.float64, + np.complex128, +] +TEST_DTYPES += [b"a3", b"a5", b"a6", b"a7", b"a9", b"a11", b"a12", b"a24", b"a48"] + + +class TestProfile(unittest.TestCase): + def setUp(self): + n = 1024 # bytes. + if TIME: + n *= TIME + # Almost random bits, but now quite. All bits exercised (to fully test + # transpose) but still slightly compresible. + self.data = random.randint(0, 200, n).astype(np.uint8) + self.fun = ext.copy + self.check = None + self.check_data = None + self.case = "None" + + def tearDown(self): + """Performs all tests and timings.""" + if TIME: + reps = 10 + else: + reps = 1 + delta_ts = [] + try: + for ii in range(reps): + t0 = time.time() + out = self.fun(self.data) + delta_ts.append(time.time() - t0) + except RuntimeError as err: + if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2(): + return + if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2(): + return + else: + raise + delta_t = min(delta_ts) + size_i = self.data.size * self.data.dtype.itemsize + size_o = out.size * out.dtype.itemsize + size = max([size_i, size_o]) + speed = ext.REPEAT * size / delta_t / 1024**3 # GB/s + if TIME: + print("%-20s: %5.2f s/GB, %5.2f GB/s" % (self.case, 1.0 / speed, speed)) + if self.check is not None: + ans = self.check(self.data).view(np.uint8) + self.assertTrue(np.all(ans == out.view(np.uint8))) + if self.check_data is not None: + ans = self.check_data.view(np.uint8) + self.assertTrue(np.all(ans == out.view(np.uint8))) + + def test_00_copy(self): + self.case = "copy" + self.fun = ext.copy + self.check = lambda x: x + + def test_01a_trans_byte_elem_scal_16(self): + self.case = "byte T elem scal 16" + self.data = self.data.view(np.int16) + self.fun = ext.trans_byte_elem_scal + self.check = trans_byte_elem + + def test_01b_trans_byte_elem_scal_32(self): + self.case = "byte T elem scal 32" + self.data = self.data.view(np.int32) + self.fun = ext.trans_byte_elem_scal + self.check = trans_byte_elem + + def test_01c_trans_byte_elem_scal_64(self): + self.case = "byte T elem scal 64" + self.data = self.data.view(np.int64) + self.fun = ext.trans_byte_elem_scal + self.check = trans_byte_elem + + def test_01d_trans_byte_elem_16(self): + self.case = "byte T elem SSE 16" + self.data = self.data.view(np.int16) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_01e_trans_byte_elem_32(self): + self.case = "byte T elem SSE 32" + self.data = self.data.view(np.float32) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_01f_trans_byte_elem_64(self): + self.case = "byte T elem SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_01g_trans_byte_elem_128(self): + self.case = "byte T elem SSE 128" + self.data = self.data.view(np.complex128) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_01h_trans_byte_elem_96(self): + self.case = "byte T elem SSE 96" + n = self.data.size // 128 * 96 + dt = np.dtype( + [(str("a"), np.int32), (str("b"), np.int32), (str("c"), np.int32)] + ) + self.data = self.data[:n].view(dt) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_01i_trans_byte_elem_80(self): + self.case = "byte T elem SSE 80" + n = self.data.size // 128 * 80 + dt = np.dtype( + [ + (str("a"), np.int16), + (str("b"), np.int16), + (str("c"), np.int16), + (str("d"), np.int16), + (str("e"), np.int16), + ] + ) + self.data = self.data[:n].view(dt) + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def test_03a_trans_bit_byte(self): + self.case = "bit T byte scal 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_byte_scal + self.check = trans_bit_byte + + def test_03d_trans_bit_byte_SSE(self): + self.case = "bit T byte SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_byte_SSE + self.check = trans_bit_byte + + def test_03f_trans_bit_byte_AVX(self): + self.case = "bit T byte AVX 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_byte_AVX + self.check = trans_bit_byte + + def test_03g_trans_bit_byte_AVX_32(self): + self.case = "bit T byte AVX 32" + self.data = self.data.view(np.float32) + self.fun = ext.trans_bit_byte_AVX + self.check = trans_bit_byte + + def test_04a_trans_bit_elem_AVX(self): + self.case = "bit T elem AVX 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_04b_trans_bit_elem_AVX_128(self): + self.case = "bit T elem AVX 128" + self.data = self.data.view(np.complex128) + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_04c_trans_bit_elem_AVX_32(self): + self.case = "bit T elem AVX 32" + self.data = self.data.view(np.float32) + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_04d_trans_bit_elem_AVX_16(self): + self.case = "bit T elem AVX 16" + self.data = self.data.view(np.int16) + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_04e_trans_bit_elem_64(self): + self.case = "bit T elem scal 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_scal + self.check = trans_bit_elem + + def test_04f_trans_bit_elem_SSE_32(self): + self.case = "bit T elem SSE 32" + self.data = self.data.view(np.float32) + self.fun = ext.trans_bit_elem_SSE + self.check = trans_bit_elem + + def test_04g_trans_bit_elem_SSE_64(self): + self.case = "bit T elem SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_SSE + self.check = trans_bit_elem + + def test_06a_untrans_bit_elem_16(self): + self.case = "bit U elem SSE 16" + pre_trans = self.data.view(np.int16) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_SSE + self.check_data = pre_trans + + def test_06b_untrans_bit_elem_128(self): + self.case = "bit U elem SSE 128" + pre_trans = self.data.view(np.complex128) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_SSE + self.check_data = pre_trans + + def test_06c_untrans_bit_elem_32(self): + self.case = "bit U elem SSE 32" + pre_trans = self.data.view(np.float32) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_SSE + self.check_data = pre_trans + + def test_06d_untrans_bit_elem_32(self): + self.case = "bit U elem AVX 32" + pre_trans = self.data.view(np.float32) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_AVX + self.check_data = pre_trans + + def test_06e_untrans_bit_elem_64(self): + self.case = "bit U elem SSE 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_SSE + self.check_data = pre_trans + + def test_06f_untrans_bit_elem_64(self): + self.case = "bit U elem AVX 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_AVX + self.check_data = pre_trans + + def test_06g_untrans_bit_elem_64(self): + self.case = "bit U elem scal 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_scal + self.check_data = pre_trans + + def test_07a_trans_byte_bitrow_64(self): + self.case = "byte T row scal 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_byte_bitrow_scal + + def test_07b_trans_byte_bitrow_SSE_64(self): + self.case = "byte T row SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_byte_bitrow_SSE + self.check = ext.trans_byte_bitrow_scal + + def test_07c_trans_byte_bitrow_AVX_64(self): + self.case = "byte T row AVX 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_byte_bitrow_AVX + self.check = ext.trans_byte_bitrow_scal + + def test_08a_shuffle_bit_eight_scal_64(self): + self.case = "bit S eight scal 64" + self.data = self.data.view(np.float64) + self.fun = ext.shuffle_bit_eightelem_scal + + def test_08b_shuffle_bit_eight_SSE_64(self): + self.case = "bit S eight SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.shuffle_bit_eightelem_SSE + self.check = ext.shuffle_bit_eightelem_scal + + def test_08c_shuffle_bit_eight_AVX_32(self): + self.case = "bit S eight AVX 32" + self.data = self.data.view(np.float32) + self.fun = ext.shuffle_bit_eightelem_AVX + self.check = ext.shuffle_bit_eightelem_scal + + def test_08d_shuffle_bit_eight_AVX_64(self): + self.case = "bit S eight AVX 64" + self.data = self.data.view(np.float64) + self.fun = ext.shuffle_bit_eightelem_AVX + self.check = ext.shuffle_bit_eightelem_scal + + def test_08e_shuffle_bit_eight_AVX_16(self): + self.case = "bit S eight AVX 16" + self.data = self.data.view(np.int16) + self.fun = ext.shuffle_bit_eightelem_AVX + self.check = ext.shuffle_bit_eightelem_scal + + def test_08f_shuffle_bit_eight_AVX_128(self): + self.case = "bit S eight AVX 128" + self.data = self.data.view(np.complex128) + self.fun = ext.shuffle_bit_eightelem_AVX + self.check = ext.shuffle_bit_eightelem_scal + + def test_09a_trans_bit_elem_scal_64(self): + self.case = "bit T elem scal 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_scal + self.check = trans_bit_elem + + def test_09b_trans_bit_elem_SSE_64(self): + self.case = "bit T elem SSE 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_SSE + self.check = trans_bit_elem + + def test_09c_trans_bit_elem_AVX_64(self): + self.case = "bit T elem AVX 64" + self.data = self.data.view(np.float64) + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_09d_untrans_bit_elem_scal_64(self): + self.case = "bit U elem scal 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_scal + self.check_data = pre_trans + + def test_09e_untrans_bit_elem_SSE_64(self): + self.case = "bit U elem SSE 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_SSE + self.check_data = pre_trans + + def test_09f_untrans_bit_elem_AVX_64(self): + self.case = "bit U elem AVX 64" + pre_trans = self.data.view(np.float64) + self.data = trans_bit_elem(pre_trans) + self.fun = ext.untrans_bit_elem_AVX + self.check_data = pre_trans + + def test_10a_bitshuffle_64(self): + self.case = "bitshuffle 64" + self.data = self.data.view(np.float64) + self.fun = lambda x: ext.bitshuffle(x, BLOCK) + + def test_10b_bitunshuffle_64(self): + self.case = "bitunshuffle 64" + pre_trans = self.data.view(np.float64) + self.data = ext.bitshuffle(pre_trans, BLOCK) + self.fun = lambda x: ext.bitunshuffle(x, BLOCK) + self.check_data = pre_trans + + def test_10c_compress_64(self): + self.case = "compress 64" + self.data = self.data.view(np.float64) + self.fun = lambda x: ext.compress_lz4(x, BLOCK) + + def test_10d_decompress_64(self): + self.case = "decompress 64" + pre_trans = self.data.view(np.float64) + self.data = ext.compress_lz4(pre_trans, BLOCK) + self.fun = lambda x: ext.decompress_lz4( + x, pre_trans.shape, pre_trans.dtype, BLOCK + ) + self.check_data = pre_trans + + @unittest.skipUnless(__zstd__, "ZSTD support not included") + def test_10c_compress_z64(self): + self.case = "compress zstd 64" + self.data = self.data.view(np.float64) + self.fun = lambda x: ext.compress_zstd(x, BLOCK) + + @unittest.skipUnless(__zstd__, "ZSTD support not included") + def test_10d_decompress_z64(self): + self.case = "decompress zstd 64" + pre_trans = self.data.view(np.float64) + self.data = ext.compress_zstd(pre_trans, BLOCK) + self.fun = lambda x: ext.decompress_zstd( + x, pre_trans.shape, pre_trans.dtype, BLOCK + ) + self.check_data = pre_trans + + +""" +Commented out to prevent nose from finding them. +class TestDevCases(unittest.TestCase): + + def deactivated_test_trans_byte_bitrow_AVX(self): + d = np.arange(256, dtype=np.uint32) + #d = ext.trans_bit_elem(d) + t = ext.trans_byte_bitrow_AVX(d).view(np.uint8) + t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8) + t.shape = (32, 32) + t1.shape = (32, 32) + #print t[:20,:18] + self.assertTrue(np.all(t == t1)) + + def deactivated_test_untrans_bit_elem(self): + d = np.arange(32, dtype=np.uint16) + #d = random.randint(0, 2**7, 256).astype(np.uint16) + d1 = ext.trans_bit_elem(d) + #print d + t = ext.untrans_bit_elem_AVX(d1) + #t1 = ext.untrans_bit_byte_scal(d1) + #print np.reshape(d1.view(np.uint8), (16, 4)) + #print np.reshape(t1.view(np.uint8), (2, 32)) + #print np.reshape(t2.view(np.uint8), (32, 2)) + #print np.reshape(t.view(np.uint8), (32, 2)) + + def deactivated_test_trans_bit_byte(self): + d = np.arange(16, dtype=np.uint16) + t = ext.trans_bit_byte_scal(d) + #print t + t1 = trans_bit_byte(d) + #print t1 + self.assertTrue(np.all(t == t1)) + + def deactivated_test_trans_byte_bitrow_SSE(self): + d = np.arange(256, dtype = np.uint8) + t = ext.trans_byte_bitrow_scal(d) + #print np.reshape(t, (32, 8)) + t1 = ext.trans_byte_bitrow_SSE(d) + #print np.reshape(t1, (32, 8)) + self.assertTrue(np.all(t == t1)) + + def deactivated_test_trans_byte_elem_SSE(self): + d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')])) + d['a'] = np.arange(16) * 1 + d['b'] = np.arange(16) * 2 + d['c'] = np.arange(16) * 3 + #print d.dtype.itemsize + #print np.reshape(d.view(np.uint8), (16, 12)) + t1 = ext.trans_byte_elem_SSE(d) + #print np.reshape(t1.view(np.uint8), (12, 16)) + t0 = trans_byte_elem(d) + #print np.reshape(t0.view(np.uint8), (12, 16)) + self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8))) + + def deactivated_test_bitshuffle(self): + d = np.arange(128, dtype=np.uint16) + t1 = ext.bitshuffle(d) + #print t1 + t2 = ext.bitunshuffle(t1) + #print t2 + self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8))) +""" + + +class TestOddLengths(unittest.TestCase): + def setUp(self): + self.reps = 10 + self.nmax = 128 * 8 + # self.nmax = 4 * 8 # XXX + self.fun = ext.copy + self.check = lambda x: x + + def test_trans_bit_elem_SSE(self): + self.fun = ext.trans_bit_elem_SSE + self.check = trans_bit_elem + + def test_untrans_bit_elem_SSE(self): + self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) + self.check = lambda x: x + + def test_trans_bit_elem_AVX(self): + self.fun = ext.trans_bit_elem_AVX + self.check = trans_bit_elem + + def test_untrans_bit_elem_AVX(self): + self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x)) + self.check = lambda x: x + + def test_trans_bit_elem_scal(self): + self.fun = ext.trans_bit_elem_scal + self.check = trans_bit_elem + + def test_untrans_bit_elem_scal(self): + self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x)) + self.check = lambda x: x + + def test_trans_byte_elem_SSE(self): + self.fun = ext.trans_byte_elem_SSE + self.check = trans_byte_elem + + def tearDown(self): + try: + for dtype in TEST_DTYPES: + itemsize = np.dtype(dtype).itemsize + nbyte_max = self.nmax * itemsize + dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) + dbuf = dbuf.view(dtype) + for ii in range(self.reps): + n = random.randint(0, self.nmax // 8, 1)[0] * 8 + data = dbuf[:n] + out = self.fun(data).view(np.uint8) + ans = self.check(data).view(np.uint8) + self.assertTrue(np.all(out == ans)) + except RuntimeError as err: + if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2(): + return + if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2(): + return + else: + raise + + +class TestBitShuffleCircle(unittest.TestCase): + """Ensure that final filter is circularly consistant for any data type and + any length buffer.""" + + def test_circle(self): + nmax = 100000 + reps = 20 + for dtype in TEST_DTYPES: + itemsize = np.dtype(dtype).itemsize + nbyte_max = nmax * itemsize + dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) + dbuf = dbuf.view(dtype) + for ii in range(reps): + n = random.randint(0, nmax, 1)[0] + data = dbuf[:n] + shuff = ext.bitshuffle(data) + out = ext.bitunshuffle(shuff) + self.assertTrue(out.dtype is data.dtype) + self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) + + def test_circle_with_compression(self): + nmax = 100000 + reps = 20 + for dtype in TEST_DTYPES: + itemsize = np.dtype(dtype).itemsize + nbyte_max = nmax * itemsize + dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) + dbuf = dbuf.view(dtype) + for ii in range(reps): + n = random.randint(0, nmax, 1)[0] + data = dbuf[:n] + shuff = ext.compress_lz4(data) + out = ext.decompress_lz4(shuff, data.shape, data.dtype) + self.assertTrue(out.dtype is data.dtype) + self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) + + @unittest.skipUnless(__zstd__, "ZSTD support not included") + def test_circle_with_zstd_compression(self): + nmax = 100000 + reps = 20 + for dtype in TEST_DTYPES: + itemsize = np.dtype(dtype).itemsize + nbyte_max = nmax * itemsize + dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8) + dbuf = dbuf.view(dtype) + for ii in range(reps): + n = random.randint(0, nmax, 1)[0] + data = dbuf[:n] + shuff = ext.compress_zstd(data) + out = ext.decompress_zstd(shuff, data.shape, data.dtype) + self.assertTrue(out.dtype is data.dtype) + self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8))) + + +# Python implementations for checking results. + + +def trans_byte_elem(arr): + dtype = arr.dtype + itemsize = dtype.itemsize + in_buf = arr.flat[:].view(np.uint8) + nelem = in_buf.size // itemsize + in_buf.shape = (nelem, itemsize) + + out_buf = np.empty((itemsize, nelem), dtype=np.uint8) + for ii in range(nelem): + for jj in range(itemsize): + out_buf[jj, ii] = in_buf[ii, jj] + return out_buf.flat[:].view(dtype) + + +def trans_bit_byte(arr): + n = arr.size + dtype = arr.dtype + itemsize = dtype.itemsize + bits = np.unpackbits(arr.view(np.uint8)) + bits.shape = (n * itemsize, 8) + # We have to reverse the order of the bits both for unpacking and packing, + # since we want to call the least significant bit the first bit. + bits = bits[:, ::-1] + bits_shuff = (bits.T).copy() + bits_shuff.shape = (n * itemsize, 8) + bits_shuff = bits_shuff[:, ::-1] + arr_bt = np.packbits(bits_shuff.flat[:]) + return arr_bt.view(dtype) + + +def trans_bit_elem(arr): + n = arr.size + dtype = arr.dtype + itemsize = dtype.itemsize + bits = np.unpackbits(arr.view(np.uint8)) + bits.shape = (n * itemsize, 8) + # We have to reverse the order of the bits both for unpacking and packing, + # since we want to call the least significant bit the first bit. + bits = bits[:, ::-1].copy() + bits.shape = (n, itemsize * 8) + bits_shuff = (bits.T).copy() + bits_shuff.shape = (n * itemsize, 8) + bits_shuff = bits_shuff[:, ::-1] + arr_bt = np.packbits(bits_shuff.flat[:]) + return arr_bt.view(dtype) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_h5filter.py b/tests/test_h5filter.py new file mode 100644 index 00000000..2dbb2c3f --- /dev/null +++ b/tests/test_h5filter.py @@ -0,0 +1,138 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import unittest +import os +import glob + +import numpy as np +import h5py +import pytest +from h5py import h5z + +from bitshuffle import h5, __zstd__ + + +os.environ["HDF5_PLUGIN_PATH"] = "" + + +class TestFilter(unittest.TestCase): + def test_filter(self): + shape = (32 * 1024 + 783,) + chunks = (4 * 1024 + 23,) + dtype = np.int64 + data = np.arange(shape[0]) + fname = "tmp_test_filters.h5" + f = h5py.File(fname, "w") + h5.create_dataset( + f, + b"range", + shape, + dtype, + chunks, + filter_pipeline=(32008, 32000), + filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), + filter_opts=None, + ) + f["range"][:] = data + + f.close() + + f = h5py.File(fname, "r") + d = f["range"][:] + self.assertTrue(np.all(d == data)) + f.close() + + def test_with_block_size(self): + shape = (128 * 1024 + 783,) + chunks = (4 * 1024 + 23,) + dtype = np.int64 + data = np.arange(shape[0]) + fname = "tmp_test_filters.h5" + f = h5py.File(fname, "w") + h5.create_dataset( + f, + b"range", + shape, + dtype, + chunks, + filter_pipeline=(32008, 32000), + filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY), + filter_opts=((680,), ()), + ) + f["range"][:] = data + + f.close() + # os.system('h5dump -H -p tmp_test_filters.h5') + + f = h5py.File(fname, "r") + d = f["range"][:] + self.assertTrue(np.all(d == data)) + f.close() + + def test_with_lz4_compression(self): + shape = (128 * 1024 + 783,) + chunks = (4 * 1024 + 23,) + dtype = np.int64 + data = np.arange(shape[0]) + fname = "tmp_test_filters.h5" + f = h5py.File(fname, "w") + h5.create_dataset( + f, + b"range", + shape, + dtype, + chunks, + filter_pipeline=(32008,), + filter_flags=(h5z.FLAG_MANDATORY,), + filter_opts=((0, h5.H5_COMPRESS_LZ4),), + ) + f["range"][:] = data + + f.close() + # os.system('h5dump -H -p tmp_test_filters.h5') + + f = h5py.File(fname, "r") + d = f["range"][:] + self.assertTrue(np.all(d == data)) + f.close() + + @pytest.mark.skipif( + __zstd__ is False, + reason="Bitshuffle has not been built with ZSTD support.", + ) + def test_with_zstd_compression(self): + shape = (128 * 1024 + 783,) + chunks = (4 * 1024 + 23,) + compression_lvl = 10 + dtype = np.int64 + data = np.arange(shape[0]) + fname = "tmp_test_filters.h5" + f = h5py.File(fname, "w") + h5.create_dataset( + f, + b"range", + shape, + dtype, + chunks, + filter_pipeline=(32008,), + filter_flags=(h5z.FLAG_MANDATORY,), + filter_opts=((0, h5.H5_COMPRESS_ZSTD, compression_lvl),), + ) + f["range"][:] = data + + f.close() + # os.system('h5dump -H -p tmp_test_filters.h5') + + f = h5py.File(fname, "r") + d = f["range"][:] + self.assertTrue(np.all(d == data)) + f.close() + + def tearDown(self): + files = glob.glob("tmp_test_*") + for f in files: + os.remove(f) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_h5plugin.py b/tests/test_h5plugin.py new file mode 100644 index 00000000..001fa9da --- /dev/null +++ b/tests/test_h5plugin.py @@ -0,0 +1,66 @@ +from __future__ import absolute_import, division, print_function, unicode_literals +import unittest +import os +import glob + +import numpy as np +import h5py +import pytest +from subprocess import Popen, PIPE, STDOUT + +import bitshuffle + + +plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__), "plugin") +os.environ["HDF5_PLUGIN_PATH"] = plugin_dir + + +H5VERSION = h5py.h5.get_libversion() +if H5VERSION[0] < 1 or ( + H5VERSION[0] == 1 + and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11)) +): + H51811P = False +else: + H51811P = True + + +class TestFilterPlugins(unittest.TestCase): + @pytest.mark.skipif( + "CIBUILDWHEEL" in os.environ, + reason="Can't build dynamic HDF5 plugin into bitshuffle wheel.", + ) + def test_plugins(self): + if not H51811P: + return + shape = (32 * 1024,) + chunks = (4 * 1024,) + dtype = np.int64 + data = np.arange(shape[0]) + fname = "tmp_test_filters.h5" + f = h5py.File(fname, "w") + dset = f.create_dataset( + "range", shape=shape, dtype=dtype, chunks=chunks, compression=32008 + ) + dset[:] = data + f.close() + + # Make sure the filters are working outside of h5py by calling h5dump + h5dump = Popen(["h5dump", fname], stdout=PIPE, stderr=STDOUT) + stdout, nothing = h5dump.communicate() + err = h5dump.returncode + self.assertEqual(err, 0) + + f = h5py.File(fname, "r") + d = f["range"][:] + self.assertTrue(np.all(d == data)) + f.close() + + def tearDown(self): + files = glob.glob("tmp_test_*") + for f in files: + os.remove(f) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_regression.py b/tests/test_regression.py new file mode 100644 index 00000000..bb9febc4 --- /dev/null +++ b/tests/test_regression.py @@ -0,0 +1,46 @@ +""" +Test that data encoded with earlier versions can still be decoded correctly. + +""" + +from __future__ import absolute_import, division, print_function + +import pathlib +import unittest + +import numpy as np +import h5py +from bitshuffle import __zstd__ + +from packaging import version + +TEST_DATA_DIR = pathlib.Path(__file__).parent / "data" + +OUT_FILE_TEMPLATE = "regression_%s.h5" + +VERSIONS = ["0.1.3", "0.4.0"] + + +class TestAll(unittest.TestCase): + def test_regression(self): + for rev in VERSIONS: + file_name = TEST_DATA_DIR / (OUT_FILE_TEMPLATE % rev) + f = h5py.File(file_name, "r") + g_orig = f["original"] + g_comp = f["compressed"] + + for dset_name in g_comp.keys(): + self.assertTrue(np.all(g_comp[dset_name][:] == g_orig[dset_name][:])) + + # Only run ZSTD comparison on versions >= 0.4.0 and if ZSTD support + # has been built into bitshuffle + if version.parse(rev) >= version.parse("0.4.0") and __zstd__: + g_comp_zstd = f["compressed_zstd"] + for dset_name in g_comp_zstd.keys(): + self.assertTrue( + np.all(g_comp_zstd[dset_name][:] == g_orig[dset_name][:]) + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/zstd b/zstd new file mode 160000 index 00000000..18d02cbf --- /dev/null +++ b/zstd @@ -0,0 +1 @@ +Subproject commit 18d02cbf2e0654de08093094f1a77cfd231f11d7 From 7a4843f7d4fc097fbd4082fbe92ff7c152dcb020 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Tue, 8 Nov 2022 11:45:44 +0100 Subject: [PATCH 6/7] Patch bitshuffle to build on Windows Ref: https://github.com/kiyo-masui/bitshuffle/pull/122 --- src/bitshuffle/src/bitshuffle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitshuffle/src/bitshuffle.c b/src/bitshuffle/src/bitshuffle.c index a8ef0b5c..ba5cde3a 100644 --- a/src/bitshuffle/src/bitshuffle.c +++ b/src/bitshuffle/src/bitshuffle.c @@ -182,7 +182,7 @@ int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr, tmp_buf = malloc(size * elem_size); if (tmp_buf == NULL) return -1; - nbytes = ZSTD_decompress(tmp_buf, size * elem_size, in + 4, nbytes_from_header); + nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header); CHECK_ERR_FREE_LZ(nbytes, tmp_buf); if (nbytes != size * elem_size) { free(tmp_buf); From f84f88e7e56d394605a51769d6971ff33c6c3f06 Mon Sep 17 00:00:00 2001 From: Thomas VINCENT Date: Tue, 8 Nov 2022 11:50:22 +0100 Subject: [PATCH 7/7] Add reference of patch --- doc/information.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/information.rst b/doc/information.rst index 20c13248..8dcd19b1 100644 --- a/doc/information.rst +++ b/doc/information.rst @@ -50,7 +50,7 @@ HDF5 filters and compression libraries HDF5 compression filters and compression libraries sources were obtained from: * LZ4 plugin (commit d48f960) and lz4 (v1.9.3): https://github.com/nexusformat/HDF5-External-Filter-Plugins and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/lz4-1.9.3 -* bitshuffle plugin (0.4.2) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0 +* bitshuffle plugin (0.4.2 + patch `PR #122 `_) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0 * bzip2 plugin (from PyTables v3.7.0) and bzip2 (v1.0.8): https://github.com/PyTables/PyTables/, https://sourceware.org/git/bzip2.git * hdf5-blosc plugin (v1.0.0), c-blosc (v1.21.1) and snappy (v1.1.9): https://github.com/Blosc/hdf5-blosc, https://github.com/Blosc/c-blosc and https://github.com/google/snappy * FCIDECOMP plugin (v1.0.2) and CharLS (branch 1.x-master SHA1 ID: 25160a42fb62e71e4b0ce081f5cb3f8bb73938b5):