From 4298f5e32849a6c8aba3b64f903b2749d0455180 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Wed, 18 May 2022 10:47:34 +0200
Subject: [PATCH 1/7] Build bitshuffle with zstd from blosc

---
 setup.py | 84 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/setup.py b/setup.py
index fe2f2815..1a3356af 100644
--- a/setup.py
+++ b/setup.py
@@ -539,37 +539,6 @@ def prefix(directory, files):
 """Mapping plugin name to library name they depend on"""
 
 
-# bitshuffle (+lz4) plugin
-# Plugins from https://github.com/kiyo-masui/bitshuffle
-bithsuffle_dir = 'src/bitshuffle'
-
-# Set compile args for both MSVC and others, list is stripped at build time
-extra_compile_args = ['-O3', '-ffast-math', '-std=c99', '-fopenmp']
-extra_compile_args += ['/Ox', '/fp:fast', '/openmp']
-if platform.machine() == "ppc64le":
-    # Required on ppc64le
-    sse2_options = {'extra_compile_args': ['-DUSESSE2'] }
-else:
-    sse2_options = {}
-extra_link_args = ['-fopenmp', '/openmp']
-
-bithsuffle_plugin = HDF5PluginExtension(
-    "hdf5plugin.plugins.libh5bshuf",
-    sources=prefix(bithsuffle_dir,
-        ["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c",
-         "src/bitshuffle.c", "src/bitshuffle_core.c",
-         "src/iochain.c", "lz4/lz4.c"]),
-    depends=prefix(bithsuffle_dir,
-        ["src/bitshuffle.h", "src/bitshuffle_core.h",
-         "src/iochain.h", 'src/bshuf_h5filter.h',
-         "lz4/lz4.h"]),
-    include_dirs=prefix(bithsuffle_dir, ['src/', 'lz4/']),
-    extra_compile_args=extra_compile_args,
-    extra_link_args=extra_link_args,
-    sse2=sse2_options,
-    )
-
-
 # blosc plugin
 # Plugin from https://github.com/Blosc/hdf5-blosc
 # c-blosc from https://github.com/Blosc/c-blosc
@@ -633,10 +602,14 @@ def prefix(directory, files):
 define_macros.append(('HAVE_ZLIB', 1))
 
 # zstd
-sources += glob(blosc_dir +'internal-complibs/zstd*/*/*.c')
-depends += glob(blosc_dir +'internal-complibs/zstd*/*/*.h')
-include_dirs += glob(blosc_dir + 'internal-complibs/zstd*')
-include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common')
+zstd_sources = glob(blosc_dir +'internal-complibs/zstd*/*/*.c')
+zstd_depends = glob(blosc_dir +'internal-complibs/zstd*/*/*.h')
+zstd_include_dirs = glob(blosc_dir + 'internal-complibs/zstd*')
+zstd_include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common')
+
+sources += zstd_sources
+depends += zstd_depends
+include_dirs += zstd_include_dirs
 define_macros.append(('HAVE_ZSTD', 1))
 
 extra_compile_args = ['-std=gnu99']  # Needed to build manylinux1 wheels
@@ -664,19 +637,50 @@ def prefix(directory, files):
 
 # HDF5Plugin-Zstandard
 zstandard_dir = os.path.join("src", "HDF5Plugin-Zstandard")
-zstandard_include_dirs = glob(blosc_dir + 'internal-complibs/zstd*')
-zstandard_include_dirs += glob(blosc_dir + 'internal-complibs/zstd*/common')
 zstandard_sources = [os.path.join(zstandard_dir, 'zstd_h5plugin.c')]
-zstandard_sources += glob(blosc_dir +'internal-complibs/zstd*/*/*.c')
+zstandard_sources += zstd_sources
 zstandard_depends = [os.path.join(zstandard_dir, 'zstd_h5plugin.h')]
-zstandard_depends += glob(blosc_dir +'internal-complibs/zstd*/*/*.h')
+zstandard_depends += zstd_depends
 zstandard_plugin = HDF5PluginExtension(
     "hdf5plugin.plugins.libh5zstd",
     sources=zstandard_sources,
     depends=zstandard_depends,
-    include_dirs=zstandard_include_dirs,
+    include_dirs=zstd_include_dirs,
     )
 
+# bitshuffle (+lz4 or zstd) plugin
+# Plugins from https://github.com/kiyo-masui/bitshuffle
+bithsuffle_dir = 'src/bitshuffle'
+
+# Set compile args for both MSVC and others, list is stripped at build time
+extra_compile_args = ['-O3', '-ffast-math', '-std=c99', '-fopenmp']
+extra_compile_args += ['/Ox', '/fp:fast', '/openmp']
+if platform.machine() == "ppc64le":
+    # Required on ppc64le
+    sse2_options = {'extra_compile_args': ['-DUSESSE2'] }
+else:
+    sse2_options = {}
+extra_link_args = ['-fopenmp', '/openmp']
+define_macros = [("ZSTD_SUPPORT", 1)]
+
+bithsuffle_plugin = HDF5PluginExtension(
+    "hdf5plugin.plugins.libh5bshuf",
+    sources=prefix(bithsuffle_dir,
+        ["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c",
+         "src/bitshuffle.c", "src/bitshuffle_core.c",
+         "src/iochain.c", "lz4/lz4.c"]) + zstd_sources,
+    depends=prefix(bithsuffle_dir,
+        ["src/bitshuffle.h", "src/bitshuffle_core.h",
+         "src/iochain.h", 'src/bshuf_h5filter.h',
+         "lz4/lz4.h"]) + zstd_depends,
+    include_dirs=prefix(bithsuffle_dir, ['src/', 'lz4/']) + zstd_include_dirs,
+    define_macros=define_macros,
+    extra_compile_args=extra_compile_args,
+    extra_link_args=extra_link_args,
+    sse2=sse2_options,
+    )
+
+
 
 # lz4 plugin
 # Source from https://github.com/nexusformat/HDF5-External-Filter-Plugins

From a1c258abc42c4b48ce1662afbfb7044d5b9b3925 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Wed, 18 May 2022 10:47:59 +0200
Subject: [PATCH 2/7] update doc

---
 doc/contribute.rst  | 10 ++++++++--
 doc/information.rst |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/doc/contribute.rst b/doc/contribute.rst
index 0abb1538..12850c2a 100644
--- a/doc/contribute.rst
+++ b/doc/contribute.rst
@@ -89,12 +89,18 @@ The meaning of those integers is filter dependent and is described below.
 bitshuffle
 ..........
 
-compression_opts: (**block_size**, **lz4 compression**)
+compression_opts: (**block_size**, **compression**, **level**)
 
 - **block size**: Number of elements (not bytes) per block.
   It MUST be a mulitple of 8.
   Default: 0 for a block size of about 8 kB.
-- **lz4 compression**: 0: disabled (default), 2: enabled.
+- **compression**:
+
+  * 0: No compression
+  * 2: LZ4
+  * 3: Zstd
+
+- **level**: Compression level, only used with Zstd compression.
 
 By default the filter uses bitshuffle, but does NOT compress with LZ4.
 
diff --git a/doc/information.rst b/doc/information.rst
index 1c761868..20c13248 100644
--- a/doc/information.rst
+++ b/doc/information.rst
@@ -50,7 +50,7 @@ HDF5 filters and compression libraries
 HDF5 compression filters and compression libraries sources were obtained from:
 
 * LZ4 plugin (commit d48f960) and lz4 (v1.9.3): https://github.com/nexusformat/HDF5-External-Filter-Plugins and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/lz4-1.9.3
-* bitshuffle plugin (0.3.5): https://github.com/kiyo-masui/bitshuffle
+* bitshuffle plugin (0.4.2) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0
 * bzip2 plugin (from PyTables v3.7.0) and bzip2 (v1.0.8): https://github.com/PyTables/PyTables/, https://sourceware.org/git/bzip2.git
 * hdf5-blosc plugin (v1.0.0), c-blosc (v1.21.1) and snappy (v1.1.9): https://github.com/Blosc/hdf5-blosc, https://github.com/Blosc/c-blosc and https://github.com/google/snappy
 * FCIDECOMP plugin (v1.0.2) and CharLS (branch 1.x-master SHA1 ID: 25160a42fb62e71e4b0ce081f5cb3f8bb73938b5):

From 2628fefc66872507d90edaca536fcaa451c31f76 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Wed, 18 May 2022 10:48:25 +0200
Subject: [PATCH 3/7] update python wrapper and tests

---
 src/hdf5plugin/__init__.py | 44 ++++++++++++++++++++++++++++++++------
 src/hdf5plugin/test.py     | 21 +++++++++++++++++-
 2 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/src/hdf5plugin/__init__.py b/src/hdf5plugin/__init__.py
index a731a1fa..6f8f7938 100644
--- a/src/hdf5plugin/__init__.py
+++ b/src/hdf5plugin/__init__.py
@@ -141,18 +141,48 @@ class Bitshuffle(_FilterRefClass):
         The number of elements per block.
         It needs to be divisible by eight (default is 0, about 8kB per block)
         Default: 0 (for about 8kB per block).
-    :param bool lz4:
-        Whether to use lz4 compression or not as part of the filter.
-        Default: True
+    :param str cname:
+        `lz4` (default), `none`, `zstd`
+    :param int clevel: Compression level, used only for `zstd` compression.
+        Can be negative, and must be below or equal to 22 (maximum compression).
+        Default: 3.
     """
     filter_id = BSHUF_ID
 
-    def __init__(self, nelems=0, lz4=True):
+    __COMPRESSIONS = {
+        'none': 0,
+        'lz4': 2,
+        'zstd': 3,
+    }
+
+    def __init__(self, nelems=0, cname=None, clevel=3, lz4=None):
         nelems = int(nelems)
         assert nelems % 8 == 0
-
-        lz4_enabled = 2 if lz4 else 0
-        self.filter_options = (nelems, lz4_enabled)
+        assert clevel <= 22
+
+        if lz4 is not None:
+            if cname is not None:
+                raise ValueError("Providing both cname and lz4 arguments is not supported")
+            _logger.warning(
+                "Depreaction: hdf5plugin.Bitshuffle's lz4 argument is deprecated, "
+                "use cname='lz4' or 'none' instead.")
+            cname = 'lz4' if lz4 else 'none'
+
+        if cname in (True, False):
+            _logger.warning(
+                "Depreaction: hdf5plugin.Bitshuffle's boolean argument is deprecated, "
+                "use cname='lz4' or 'none' instead.")
+            cname = 'lz4' if cname else 'none'
+
+        if cname is None:
+            cname = 'lz4'
+        if cname not in self.__COMPRESSIONS:
+            raise ValueError("Unsupported compression: %s" % cname)
+
+        if cname == 'zstd':
+            self.filter_options = (nelems, self.__COMPRESSIONS[cname], clevel)
+        else:
+            self.filter_options = (nelems, self.__COMPRESSIONS[cname])
 
 
 class Blosc(_FilterRefClass):
diff --git a/src/hdf5plugin/test.py b/src/hdf5plugin/test.py
index 25047b67..277aa7e1 100644
--- a/src/hdf5plugin/test.py
+++ b/src/hdf5plugin/test.py
@@ -107,7 +107,7 @@ def _test(self,
         return filters[0]
 
     @unittest.skipUnless(should_test("bshuf"), "Bitshuffle filter not available")
-    def testBitshuffle(self):
+    def testDepreactedBitshuffle(self):
         """Write/read test with bitshuffle filter plugin"""
         self._test('bshuf')  # Default options
 
@@ -119,6 +119,25 @@ def testBitshuffle(self):
                         filter_ = self._test('bshuf', dtype, compressed=lz4, nelems=nelems, lz4=lz4)
                         self.assertEqual(filter_[2][3:], (nelems, 2 if lz4 else 0))
 
+    @unittest.skipUnless(should_test("bshuf"), "Bitshuffle filter not available")
+    def testBitshuffle(self):
+        """Write/read test with bitshuffle filter plugin"""
+        self._test('bshuf')  # Default options
+
+        compression_ids = {
+            'none': 0,
+            'lz4': 2,
+            'zstd': 3
+        }
+
+        # Specify options
+        for cname in ('none', 'lz4', 'zstd'):
+            for dtype in (numpy.int8, numpy.int16, numpy.int32, numpy.int64):
+                for nelems in (1024, 2048):
+                    with self.subTest(cname=cname, dtype=dtype, nelems=nelems):
+                        filter_ = self._test('bshuf', dtype, compressed=cname!='none', nelems=nelems, cname=cname)
+                        self.assertEqual(filter_[2][3:5], (nelems, compression_ids[cname]))
+
     @unittest.skipUnless(should_test("blosc"), "Blosc filter not available")
     def testBlosc(self):
         """Write/read test with blosc filter plugin"""

From 33018d6626dd90a027cf41b583c0d35f7ed8c391 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Wed, 18 May 2022 11:30:08 +0200
Subject: [PATCH 4/7] remove src/bitshuffle

---
 src/bitshuffle/.gitignore                     |   77 -
 src/bitshuffle/.travis.yml                    |   33 -
 src/bitshuffle/LICENSE                        |   21 -
 src/bitshuffle/MANIFEST.in                    |   10 -
 src/bitshuffle/README.rst                     |  240 ---
 src/bitshuffle/bitshuffle/__init__.py         |   21 -
 src/bitshuffle/bitshuffle/ext.pyx             |  449 ----
 src/bitshuffle/bitshuffle/h5.pyx              |  205 --
 src/bitshuffle/bitshuffle/tests/__init__.py   |    0
 .../bitshuffle/tests/data/regression_0.1.3.h5 |  Bin 114447 -> 0 bytes
 .../bitshuffle/tests/make_regression_tdata.py |   42 -
 src/bitshuffle/bitshuffle/tests/test_ext.py   |  588 ------
 .../bitshuffle/tests/test_h5filter.py         |   91 -
 .../bitshuffle/tests/test_h5plugin.py         |   83 -
 .../bitshuffle/tests/test_regression.py       |   40 -
 src/bitshuffle/conda-recipe/bld.bat           |    3 -
 src/bitshuffle/conda-recipe/build.sh          |    2 -
 src/bitshuffle/conda-recipe/meta.yaml         |   27 -
 src/bitshuffle/conda-recipe/setup.py.patch    |   13 -
 src/bitshuffle/lz4/LICENSE                    |   24 -
 src/bitshuffle/lz4/README.md                  |   21 -
 src/bitshuffle/lz4/lz4.c                      | 1516 --------------
 src/bitshuffle/lz4/lz4.h                      |  360 ----
 src/bitshuffle/lzf/LICENSE.txt                |   34 -
 src/bitshuffle/lzf/README.txt                 |   84 -
 src/bitshuffle/lzf/README_bitshuffle.txt      |    3 -
 src/bitshuffle/lzf/example.c                  |  106 -
 src/bitshuffle/lzf/lzf/lzf.h                  |  100 -
 src/bitshuffle/lzf/lzf/lzfP.h                 |  166 --
 src/bitshuffle/lzf/lzf/lzf_c.c                |  296 ---
 src/bitshuffle/lzf/lzf/lzf_d.c                |  154 --
 src/bitshuffle/lzf/lzf_filter.c               |  261 ---
 src/bitshuffle/lzf/lzf_filter.h               |   38 -
 src/bitshuffle/requirements.txt               |    5 -
 src/bitshuffle/setup.cfg.example              |   10 -
 src/bitshuffle/setup.py                       |  323 ---
 src/bitshuffle/src/bitshuffle.c               |  165 --
 src/bitshuffle/src/bitshuffle.h               |  123 --
 src/bitshuffle/src/bitshuffle_core.c          | 1862 -----------------
 src/bitshuffle/src/bitshuffle_core.h          |  157 --
 src/bitshuffle/src/bitshuffle_internals.h     |   75 -
 src/bitshuffle/src/bshuf_h5filter.c           |  218 --
 src/bitshuffle/src/bshuf_h5filter.h           |   59 -
 src/bitshuffle/src/bshuf_h5plugin.c           |   19 -
 src/bitshuffle/src/iochain.c                  |   90 -
 src/bitshuffle/src/iochain.h                  |   94 -
 src/bitshuffle/src/lzf_h5plugin.c             |   42 -
 47 files changed, 8350 deletions(-)
 delete mode 100644 src/bitshuffle/.gitignore
 delete mode 100644 src/bitshuffle/.travis.yml
 delete mode 100644 src/bitshuffle/LICENSE
 delete mode 100644 src/bitshuffle/MANIFEST.in
 delete mode 100644 src/bitshuffle/README.rst
 delete mode 100644 src/bitshuffle/bitshuffle/__init__.py
 delete mode 100644 src/bitshuffle/bitshuffle/ext.pyx
 delete mode 100644 src/bitshuffle/bitshuffle/h5.pyx
 delete mode 100644 src/bitshuffle/bitshuffle/tests/__init__.py
 delete mode 100644 src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5
 delete mode 100644 src/bitshuffle/bitshuffle/tests/make_regression_tdata.py
 delete mode 100644 src/bitshuffle/bitshuffle/tests/test_ext.py
 delete mode 100644 src/bitshuffle/bitshuffle/tests/test_h5filter.py
 delete mode 100644 src/bitshuffle/bitshuffle/tests/test_h5plugin.py
 delete mode 100644 src/bitshuffle/bitshuffle/tests/test_regression.py
 delete mode 100644 src/bitshuffle/conda-recipe/bld.bat
 delete mode 100644 src/bitshuffle/conda-recipe/build.sh
 delete mode 100644 src/bitshuffle/conda-recipe/meta.yaml
 delete mode 100644 src/bitshuffle/conda-recipe/setup.py.patch
 delete mode 100644 src/bitshuffle/lz4/LICENSE
 delete mode 100644 src/bitshuffle/lz4/README.md
 delete mode 100644 src/bitshuffle/lz4/lz4.c
 delete mode 100644 src/bitshuffle/lz4/lz4.h
 delete mode 100644 src/bitshuffle/lzf/LICENSE.txt
 delete mode 100644 src/bitshuffle/lzf/README.txt
 delete mode 100644 src/bitshuffle/lzf/README_bitshuffle.txt
 delete mode 100644 src/bitshuffle/lzf/example.c
 delete mode 100644 src/bitshuffle/lzf/lzf/lzf.h
 delete mode 100644 src/bitshuffle/lzf/lzf/lzfP.h
 delete mode 100644 src/bitshuffle/lzf/lzf/lzf_c.c
 delete mode 100644 src/bitshuffle/lzf/lzf/lzf_d.c
 delete mode 100644 src/bitshuffle/lzf/lzf_filter.c
 delete mode 100644 src/bitshuffle/lzf/lzf_filter.h
 delete mode 100644 src/bitshuffle/requirements.txt
 delete mode 100644 src/bitshuffle/setup.cfg.example
 delete mode 100644 src/bitshuffle/setup.py
 delete mode 100644 src/bitshuffle/src/bitshuffle.c
 delete mode 100644 src/bitshuffle/src/bitshuffle.h
 delete mode 100644 src/bitshuffle/src/bitshuffle_core.c
 delete mode 100644 src/bitshuffle/src/bitshuffle_core.h
 delete mode 100644 src/bitshuffle/src/bitshuffle_internals.h
 delete mode 100644 src/bitshuffle/src/bshuf_h5filter.c
 delete mode 100644 src/bitshuffle/src/bshuf_h5filter.h
 delete mode 100644 src/bitshuffle/src/bshuf_h5plugin.c
 delete mode 100644 src/bitshuffle/src/iochain.c
 delete mode 100644 src/bitshuffle/src/iochain.h
 delete mode 100644 src/bitshuffle/src/lzf_h5plugin.c

diff --git a/src/bitshuffle/.gitignore b/src/bitshuffle/.gitignore
deleted file mode 100644
index d8d6cf49..00000000
--- a/src/bitshuffle/.gitignore
+++ /dev/null
@@ -1,77 +0,0 @@
-## C
-
-# Object files
-*.o
-*.ko
-*.obj
-*.elf
-
-# Libraries
-*.lib
-*.a
-
-# Shared objects (inc. Windows DLLs)
-*.dll
-*.so
-*.so.*
-*.dylib
-
-# Executables
-*.exe
-*.out
-*.app
-*.i*86
-*.x86_64
-*.hex
-
-
-## Python
-*.py[cod]
-
-# C extensions
-*.so
-
-# Packages
-*.egg
-*.egg-info
-dist
-build
-eggs
-parts
-bin
-var
-sdist
-develop-eggs
-.installed.cfg
-lib
-lib64
-__pycache__
-
-# Installer logs
-pip-log.txt
-
-# Unit test / coverage reports
-.coverage
-.tox
-nosetests.xml
-
-# Translations
-*.mo
-
-# Mr Developer
-.mr.developer.cfg
-.project
-.pydevproject
-
-# Documentation builds
-doc/_build
-doc/generated
-
-## Editor files and backups.
-*.swp
-*.swo
-
-# Generated files
-bitshuffle/ext.c
-bitshuffle/h5.c
-
diff --git a/src/bitshuffle/.travis.yml b/src/bitshuffle/.travis.yml
deleted file mode 100644
index 7b5b4994..00000000
--- a/src/bitshuffle/.travis.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-language: python
-os: linux
-# To test filter plugins, need hdf5 1.8.11+, present in Trusty but not Precise.
-dist: trusty
-# Required to get Trusty.
-#sudo: true
-python:
-    - "2.7"
-    - "3.4"
-    - "3.5"
-    - "3.6"
-addons:
-    apt:
-        packages:
-            - libhdf5-serial-dev
-            - hdf5-tools
-install:
-    - "pip install -U pip virtualenv"
-    # Ensures the system hdf5 headers/libs will be used whatever its version
-    - "export HDF5_DIR=/usr/lib"
-    - "pip install -r requirements.txt"
-    # Installing the plugin to arbitrary directory to check the install script.
-    - "python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib"
-    # Ensure it's installable and usable in virtualenv
-    - "virtualenv ~/venv"
-    - "travis_wait 30 ~/venv/bin/pip -v install --no-binary=h5py ."
-    - "~/venv/bin/pip -v install nose"
-# Can't be somewhere that has a 'bitshuffle' directory as nose will use that
-# copy instead of installed package.
-script:
-  - "cd ~"
-  - "nosetests -v bitshuffle"  # Test the system install
-  - "venv/bin/nosetests -v bitshuffle"  # Test the virtualenv install
diff --git a/src/bitshuffle/LICENSE b/src/bitshuffle/LICENSE
deleted file mode 100644
index 1365ed69..00000000
--- a/src/bitshuffle/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-Bitshuffle - Filter for improving compression of typed binary data.
-
-Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/src/bitshuffle/MANIFEST.in b/src/bitshuffle/MANIFEST.in
deleted file mode 100644
index 00746c64..00000000
--- a/src/bitshuffle/MANIFEST.in
+++ /dev/null
@@ -1,10 +0,0 @@
-recursive-include src *.h *.c
-recursive-include bitshuffle *.pyx
-recursive-include lz4 *.h *.c
-recursive-include lzf *.h *.c
-include setup.cfg.example
-include LICENSE
-include README.rst
-include requirements.txt
-exclude setup.cfg
-
diff --git a/src/bitshuffle/README.rst b/src/bitshuffle/README.rst
deleted file mode 100644
index 343b4c62..00000000
--- a/src/bitshuffle/README.rst
+++ /dev/null
@@ -1,240 +0,0 @@
-==========
-Bitshuffle
-==========
-
-Filter for improving compression of typed binary data.
-
-Bitshuffle is an algorithm that rearranges typed, binary data for improving
-compression, as well as a python/C package that implements this algorithm
-within the Numpy framework.
-
-The library can be used along side HDF5 to compress and decompress datasets and
-is integrated through the `dynamically loaded filters`_ framework. Bitshuffle
-is HDF5 filter number ``32008``.
-
-Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_
-except it operates at the bit level instead of the byte level. Arranging a
-typed data array in to a matrix with the elements as the rows and the bits
-within the elements as the columns, Bitshuffle "transposes" the matrix,
-such that all the least-significant-bits are in a row, etc.  This transpose
-is performed within blocks of data roughly 8kB long [1]_.
-
-This does not in itself compress data, only rearranges it for more efficient
-compression. To perform the actual compression you will need a compression
-library.  Bitshuffle has been designed to be well matched Marc Lehmann's
-LZF_ as well as LZ4_. Note that because Bitshuffle modifies the data at the bit
-level, sophisticated entropy reducing compression libraries such as GZIP and
-BZIP are unlikely to achieve significantly better compression than simpler and
-faster duplicate-string-elimination algorithms such as LZF and LZ4. Bitshuffle
-thus includes routines (and HDF5 filter options) to apply LZ4 compression to
-each block after shuffling [2]_.
-
-The Bitshuffle algorithm relies on neighbouring elements of a dataset being
-highly correlated to improve data compression. Any correlations that span at
-least 24 elements of the dataset may be exploited to improve compression.
-
-Bitshuffle was designed with performance in mind. On most machines the
-time required for Bitshuffle+LZ4 is insignificant compared to the time required
-to read or write the compressed data to disk. Because it is able to exploit the
-SSE and AVX instruction sets present on modern Intel and AMD processors, on
-these machines compression is only marginally slower than an out-of-cache
-memory copy.  On modern x86 processors you can expect Bitshuffle to have a
-throughput of roughly 1 byte per clock cycle, and on the Haswell generation of
-Intel processors (2013) and later, you can expect up to 2 bytes per clock
-cycle. In addition, Bitshuffle is parallelized using OpenMP.
-
-As a bonus, Bitshuffle ships with a dynamically loaded version of
-`h5py`'s LZF compression filter, such that the filter can be transparently
-used outside of python and in command line utilities such as ``h5dump``.
-
-.. [1] Chosen to fit comfortably within L1 cache as well as be well matched
-       window of the LZF compression library.
-
-.. [2] Over applying bitshuffle to the full dataset then applying LZ4
-       compression, this has the tremendous advantage that the block is
-       already in the L1 cache.
-
-.. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf
-
-.. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf
-
-.. _LZF: http://oldhome.schmorp.de/marc/liblzf.html
-
-.. _LZ4: https://code.google.com/p/lz4/
-
-
-Applications
-------------
-
-Bitshuffle might be right for your application if:
-
-- You need to compress typed binary data.
-- Your data is arranged such that adjacent elements over the fastest varying
-  index of your dataset are similar (highly correlated).
-- A special case of the previous point is if you are only exercising a subset
-  of the bits in your data-type, as is often true of integer data.
-- You need both high compression ratios and high performance.
-
-
-Comparing Bitshuffle to other compression algorithms and HDF5 filters:
-
-- Bitshuffle is less general than many other compression algorithms.
-  To achieve good compression ratios, consecutive elements of your data must
-  be highly correlated.
-- For the right datasets, Bitshuffle is one of the few compression
-  algorithms that promises both high throughput and high compression ratios.
-- Bitshuffle should have roughly the same throughput as Shuffle, but
-  may obtain higher compression ratios.
-- The MAFISC_ filter actually includes something similar to Bitshuffle as one of
-  its prefilters,  However, MAFICS's emphasis is on obtaining high compression
-  ratios at all costs, sacrificing throughput.
-
-.. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc
-
-
-Installation for Python
------------------------
-
-Installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python
-(h5py), Numpy and Cython. Bitshuffle must be linked against the same version of
-HDF5 as h5py, which in practice means h5py must be built from source_ rather
-than pre-built wheels [3]_. To use the dynamically loaded HDF5 filter requires
-HDF5 1.8.11 or later.
-
-To install::
-
-    python setup.py install [--h5plugin [--h5plugin-dir=spam]]
-
-To get finer control of installation options, including whether to compile
-with OpenMP multi-threading, copy the ``setup.cfg.example`` to ``setup.cfg``
-and edit the values therein.
-
-If using the dynamically loaded HDF5 filter (which gives you access to the
-Bitshuffle and LZF filters outside of python), set the environment variable
-``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default
-search location of ``/usr/local/hdf5/lib/plugin``.
-
-If you get an error about missing source files when building the extensions,
-try upgrading setuptools.  There is a weird bug where setuptools prior to 0.7
-doesn't work properly with Cython in some cases.
-
-.. _source: http://docs.h5py.org/en/latest/build.html#source-installation
-
-.. [3] Typically you will be able to install Bitshuffle, but there will be
-       errors when creating and reading datasets.
-
-
-Usage from Python
------------------
-
-The `bitshuffle` module contains routines for shuffling and unshuffling
-Numpy arrays.
-
-If installed with the dynamically loaded filter plugins, Bitshuffle can be used
-in conjunction with HDF5 both inside and outside of python, in the same way as
-any other filter; simply by specifying the filter number ``32008``. Otherwise
-the filter will be available only within python and only after importing
-`bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent.
-The filter can be added to new datasets either through the `h5py` low level
-interface or through the convenience functions provided in
-`bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py`
-version 2.5.0 and later Bitshuffle can added to new datasets through the
-high level interface, as in the example below.
-
-
-Example h5py
-------------
-::
-
-    import h5py
-    import numpy
-    import bitshuffle.h5
-
-    print(h5py.__version__) # >= '2.5.0'
-
-    f = h5py.File(filename, "w")
-
-    # block_size = 0 let Bitshuffle choose its value
-    block_size = 0
-
-    dataset = f.create_dataset(
-        "data",
-        (100, 100, 100),
-        compression=bitshuffle.h5.H5FILTER,
-        compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
-        dtype='float32',
-        )
-
-    # create some random data
-    array = numpy.random.rand(100, 100, 100)
-    array = array.astype('float32')
-
-    dataset[:] = array
-
-    f.close()
-
-
-Usage from C
-------------
-
-If you wish to use Bitshuffle in your C program and would prefer not to use the
-HDF5 dynamically loaded filter, the C library in the ``src/`` directory is
-self-contained and complete.
-
-
-Usage from Java
----------------
-
-You can use Bitshuffle even in Java and the routines for shuffling and unshuffling
-are ported into `snappy-java`_. To use the routines, you need to add the following
-dependency to your pom.xml::
-
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-      <version>1.1.3-M1</version>
-    </dependency>
-
-First, import org.xerial.snapy.BitShuffle in your Java code::
-
-    import org.xerial.snappy.BitShuffle;
-
-Then, you use them like this::
-
-    int[] data = new int[] {1, 3, 34, 43, 34};
-    byte[] shuffledData = BitShuffle.bitShuffle(data);
-    int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData);
-
-.. _`snappy-java`: https://github.com/xerial/snappy-java
-
-
-Anaconda
---------
-
-The conda package can be build via::
-
-    conda build conda-recipe
-
-
-For Best Results
-----------------
-
-Here are a few tips to help you get the most out of Bitshuffle:
-
-- For multi-dimensional datasets, order your data such that the fastest varying
-  dimension is the one over which your data is most correlated (have
-  values that change the least), or fake this using chunks.
-- To achieve the highest throughput, use a data type that is 64 *bytes* or
-  smaller. If you have a very large compound data type, consider adding a
-  dimension to your datasets instead.
-- To make full use of the SSE2 instruction set, use a data type whose size
-  is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose
-  size is a multiple of 4 bytes.
-
-
-Citing Bitshuffle
------------------
-
-Bitshuffle was initially described in
-http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at
-http://arxiv.org/abs/1503.00638.
diff --git a/src/bitshuffle/bitshuffle/__init__.py b/src/bitshuffle/bitshuffle/__init__.py
deleted file mode 100644
index 06d53b37..00000000
--- a/src/bitshuffle/bitshuffle/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""
-Filter for improving compression of typed binary data.
-
-Functions
-=========
-
-    using_NEON
-    using_SSE2
-    using_AVX2
-    bitshuffle
-    bitunshuffle
-    compress_lz4
-    decompress_lz4
-
-"""
-
-from __future__ import absolute_import
-
-
-from bitshuffle.ext import (__version__, bitshuffle, bitunshuffle, using_NEON, using_SSE2,
-                            using_AVX2, compress_lz4, decompress_lz4)
diff --git a/src/bitshuffle/bitshuffle/ext.pyx b/src/bitshuffle/bitshuffle/ext.pyx
deleted file mode 100644
index 6c344d80..00000000
--- a/src/bitshuffle/bitshuffle/ext.pyx
+++ /dev/null
@@ -1,449 +0,0 @@
-"""
-Wrappers for public and private bitshuffle routines
-
-"""
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import numpy as np
-
-cimport numpy as np
-cimport cython
-
-
-np.import_array()
-
-
-# Repeat each calculation this many times. For timing.
-cdef int REPEATC = 1
-#cdef int REPEATC = 32
-
-REPEAT = REPEATC
-
-cdef extern from b"bitshuffle.h":
-    int bshuf_using_NEON()
-    int bshuf_using_SSE2()
-    int bshuf_using_AVX2()
-    int bshuf_bitshuffle(void *A, void *B, int size, int elem_size,
-            int block_size)
-    int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size,
-            int block_size)
-    int bshuf_compress_lz4_bound(int size, int elem_size, int block_size)
-    int bshuf_compress_lz4(void *A, void *B, int size, int elem_size,
-            int block_size)
-    int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size,
-            int block_size)
-    int BSHUF_VERSION_MAJOR
-    int BSHUF_VERSION_MINOR
-    int BSHUF_VERSION_POINT
-
-
-__version__ = str("%d.%d.%d").format(BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
-        BSHUF_VERSION_POINT)
-
-
-# Prototypes from bitshuffle.c
-cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size)
-cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size)
-
-
-ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size)
-
-
-def using_NEON():
-    """Whether compiled using Arm NEON instructions."""
-    if bshuf_using_NEON():
-        return True
-    else:
-        return False
-
-
-def using_SSE2():
-    """Whether compiled using SSE2 instructions."""
-    if bshuf_using_SSE2():
-        return True
-    else:
-        return False
-
-
-def using_AVX2():
-    """Whether compiled using AVX2 instructions."""
-    if bshuf_using_AVX2():
-        return True
-    else:
-        return False
-
-
-def _setup_arr(arr):
-    shape = tuple(arr.shape)
-    if not arr.flags['C_CONTIGUOUS']:
-        msg = "Input array must be C-contiguous."
-        raise ValueError(msg)
-    size = arr.size
-    dtype = arr.dtype
-    itemsize = dtype.itemsize
-    out = np.empty(shape, dtype=dtype)
-    return out, size, itemsize
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef _wrap_C_fun(Cfptr fun, np.ndarray arr):
-    """Wrap a C function with standard call signature."""
-
-    cdef int ii, size, itemsize, count=0
-    cdef np.ndarray out
-    out, size, itemsize = _setup_arr(arr)
-
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
-    arr_flat = arr.view(np.uint8).ravel()
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
-    out_flat = out.view(np.uint8).ravel()
-    cdef void* arr_ptr = <void*> &arr_flat[0]
-    cdef void* out_ptr = <void*> &out_flat[0]
-
-    for ii in range(REPEATC):
-        count = fun(arr_ptr, out_ptr, size, itemsize)
-    if count < 0:
-        msg = "Failed. Error code %d."
-        excp = RuntimeError(msg % count, count)
-        raise excp
-    return out
-
-
-def copy(np.ndarray arr not None):
-    """Copies the data.
-
-    For testing and profiling purposes.
-
-    """
-    return _wrap_C_fun(&bshuf_copy, arr)
-
-
-def trans_byte_elem_scal(np.ndarray arr not None):
-    """Transpose bytes within words but not bits.
-
-    """
-    return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr)
-
-
-def trans_byte_elem_SSE(np.ndarray arr not None):
-    """Transpose bytes within array elements.
-
-    """
-    return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr)
-
-
-def trans_byte_elem_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr)
-
-
-def trans_bit_byte_scal(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr)
-
-
-def trans_bit_byte_SSE(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr)
-
-
-def trans_bit_byte_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr)
-
-
-def trans_bit_byte_AVX(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr)
-
-
-def trans_bitrow_eight(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr)
-
-
-def trans_bit_elem_AVX(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr)
-
-
-def trans_bit_elem_scal(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr)
-
-
-def trans_bit_elem_SSE(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr)
-
-
-def trans_bit_elem_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr)
-
-
-def trans_byte_bitrow_SSE(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr)
-
-
-def trans_byte_bitrow_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr)
-
-
-def trans_byte_bitrow_AVX(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr)
-
-
-def trans_byte_bitrow_scal(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr)
-
-
-def shuffle_bit_eightelem_scal(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr)
-
-
-def shuffle_bit_eightelem_SSE(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr)
-
-
-def shuffle_bit_eightelem_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr)
-
-
-def shuffle_bit_eightelem_AVX(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr)
-
-
-def untrans_bit_elem_SSE(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr)
-
-
-def untrans_bit_elem_NEON(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr)
-
-
-def untrans_bit_elem_AVX(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr)
-
-
-def untrans_bit_elem_scal(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr)
-
-
-def trans_bit_elem(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_trans_bit_elem, arr)
-
-
-def untrans_bit_elem(np.ndarray arr not None):
-    return _wrap_C_fun(&bshuf_untrans_bit_elem, arr)
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def bitshuffle(np.ndarray arr not None, int block_size=0):
-    """Bitshuffle an array.
-
-    Output array is the same shape and data type as input array but underlying
-    buffer has been bitshuffled.
-
-    Parameters
-    ----------
-    arr : numpy array
-        Data to ne processed.
-    block_size : positive integer
-        Block size in number of elements. By default, block size is chosen
-        automatically.
-
-    Returns
-    -------
-    out : numpy array
-        Array with the same shape as input but underlying data has been
-        bitshuffled.
-
-    """
-
-    cdef int ii, size, itemsize, count=0
-    cdef np.ndarray out
-    out, size, itemsize = _setup_arr(arr)
-
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
-    arr_flat = arr.view(np.uint8).ravel()
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
-    out_flat = out.view(np.uint8).ravel()
-    cdef void* arr_ptr = <void*> &arr_flat[0]
-    cdef void* out_ptr = <void*> &out_flat[0]
-
-    for ii in range(REPEATC):
-        count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
-    if count < 0:
-        msg = "Failed. Error code %d."
-        excp = RuntimeError(msg % count, count)
-        raise excp
-    return out
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def bitunshuffle(np.ndarray arr not None, int block_size=0):
-    """Bitshuffle an array.
-
-    Output array is the same shape and data type as input array but underlying
-    buffer has been un-bitshuffled.
-
-    Parameters
-    ----------
-    arr : numpy array
-        Data to ne processed.
-    block_size : positive integer
-        Block size in number of elements. Must match value used for shuffling.
-
-    Returns
-    -------
-    out : numpy array
-        Array with the same shape as input but underlying data has been
-        un-bitshuffled.
-
-    """
-
-    cdef int ii, size, itemsize, count=0
-    cdef np.ndarray out
-    out, size, itemsize = _setup_arr(arr)
-
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
-    arr_flat = arr.view(np.uint8).ravel()
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
-    out_flat = out.view(np.uint8).ravel()
-    cdef void* arr_ptr = <void*> &arr_flat[0]
-    cdef void* out_ptr = <void*> &out_flat[0]
-
-    for ii in range(REPEATC):
-        count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
-    if count < 0:
-        msg = "Failed. Error code %d."
-        excp = RuntimeError(msg % count, count)
-        raise excp
-    return out
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def compress_lz4(np.ndarray arr not None, int block_size=0):
-    """Bitshuffle then compress an array using LZ4.
-
-    Parameters
-    ----------
-    arr : numpy array
-        Data to ne processed.
-    block_size : positive integer
-        Block size in number of elements. By default, block size is chosen
-        automatically.
-
-    Returns
-    -------
-    out : array with np.uint8 data type
-        Buffer holding compressed data.
-
-    """
-
-    cdef int ii, size, itemsize, count=0
-    shape = (arr.shape[i] for i in range(arr.ndim))
-    if not arr.flags['C_CONTIGUOUS']:
-        msg = "Input array must be C-contiguous."
-        raise ValueError(msg)
-    size = arr.size
-    dtype = arr.dtype
-    itemsize = dtype.itemsize
-
-    max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size)
-
-    cdef np.ndarray out
-    out = np.empty(max_out_size, dtype=np.uint8)
-
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
-    arr_flat = arr.view(np.uint8).ravel()
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
-    out_flat = out.view(np.uint8).ravel()
-    cdef void* arr_ptr = <void*> &arr_flat[0]
-    cdef void* out_ptr = <void*> &out_flat[0]
-    for ii in range(REPEATC):
-        count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size)
-    if count < 0:
-        msg = "Failed. Error code %d."
-        excp = RuntimeError(msg % count, count)
-        raise excp
-    return out[:count]
-
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0):
-    """Decompress a buffer using LZ4 then bitunshuffle it yielding an array.
-
-    Parameters
-    ----------
-    arr : numpy array
-        Input data to be decompressed.
-    shape : tuple of integers
-        Shape of the output (decompressed array). Must match the shape of the
-        original data array before compression.
-    dtype : numpy dtype
-        Datatype of the output array. Must match the data type of the original
-        data array before compression.
-    block_size : positive integer
-        Block size in number of elements. Must match value used for
-        compression.
-
-    Returns
-    -------
-    out : numpy array with shape *shape* and data type *dtype*
-        Decompressed data.
-
-    """
-
-    cdef int ii, size, itemsize, count=0
-    if not arr.flags['C_CONTIGUOUS']:
-        msg = "Input array must be C-contiguous."
-        raise ValueError(msg)
-    size = np.prod(shape)
-    itemsize = dtype.itemsize
-
-    cdef np.ndarray out
-    out = np.empty(tuple(shape), dtype=dtype)
-
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
-    arr_flat = arr.view(np.uint8).ravel()
-    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
-    out_flat = out.view(np.uint8).ravel()
-    cdef void* arr_ptr = <void*> &arr_flat[0]
-    cdef void* out_ptr = <void*> &out_flat[0]
-    for ii in range(REPEATC):
-        count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize,
-                                     block_size)
-    if count < 0:
-        msg = "Failed. Error code %d."
-        excp = RuntimeError(msg % count, count)
-        raise excp
-    if count != arr.size:
-        msg = "Decompressed different number of bytes than input buffer size."
-        msg += "Input buffer %d, decompressed %d." % (arr.size, count)
-        raise RuntimeError(msg, count)
-    return out
-
-
diff --git a/src/bitshuffle/bitshuffle/h5.pyx b/src/bitshuffle/bitshuffle/h5.pyx
deleted file mode 100644
index cd7a0f05..00000000
--- a/src/bitshuffle/bitshuffle/h5.pyx
+++ /dev/null
@@ -1,205 +0,0 @@
-"""
-HDF5 support for Bitshuffle.
-
-To read a dataset that uses the Bitshuffle filter using h5py, simply import
-this module (unless you have installed the Bitshuffle dynamically loaded
-filter, in which case importing this module is unnecessary).
-
-To create a new dataset that includes the Bitshuffle filter, use one of the
-convenience functions provided.
-
-
-Constants
-=========
-
-    H5FILTER : The Bitshuffle HDF5 filter integer identifier.
-    H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression.
-
-Functions
-=========
-
-    create_dataset
-    create_bitshuffle_lzf_dataset
-    create_bitshuffle_compressed_dataset
-
-Examples
-========
-
-    >>> import numpy as np
-    >>> import h5py
-    >>> import bitshuffle.h5
-
-    >>> shape = (123, 456)
-    >>> chunks = (10, 456)
-    >>> dtype = np.float64
-
-    >>> f = h5py.File("tmp_test.h5")
-    >>> bitshuffle.h5.create_bitshuffle_compressed_dataset(
-            f, "some_data", shape, dtype, chunks)
-    >>> f["some_data"][:] = 42
-
-"""
-
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import numpy
-import h5py
-from h5py import h5d, h5s, h5t, h5p, filters
-
-cimport cython
-
-
-cdef extern from b"bshuf_h5filter.h":
-    int bshuf_register_h5filter()
-    int BSHUF_H5FILTER
-    int BSHUF_H5_COMPRESS_LZ4
-
-cdef int LZF_FILTER = 32000
-
-H5FILTER = BSHUF_H5FILTER
-H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4
-
-
-def register_h5_filter():
-    ret = bshuf_register_h5filter()
-    if ret < 0:
-        raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret)
-
-
-register_h5_filter()
-
-
-def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None,
-                   fillvalue=None, track_times=None,
-                   filter_pipeline=(), filter_flags=None, filter_opts=None):
-    """Create a dataset with an arbitrary filter pipeline.
-
-    Return a new low-level dataset identifier.
-
-    Much of this code is copied from h5py, but couldn't reuse much code due to
-    unstable API.
-
-    """
-
-    if hasattr(filter_pipeline, "__getitem__"):
-        filter_pipeline = list(filter_pipeline)
-    else:
-        filter_pipeline = [filter_pipeline]
-        filter_flags = [filter_flags]
-        filter_opts = [filter_opts]
-    nfilters = len(filter_pipeline)
-    if filter_flags is None:
-        filter_flags = [None] * nfilters
-    if filter_opts is None:
-        filter_opts = [None] * nfilters
-    if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters:
-        msg = "Supplied incompatible number of filters, flags, and options."
-        raise ValueError(msg)
-
-    shape = tuple(shape)
-
-    tmp_shape = maxshape if maxshape is not None else shape
-    # Validate chunk shape
-    chunks_larger = (numpy.array([ not i>=j
-                     for i,j in zip(tmp_shape,chunks) if i is not None])).any()
-    if isinstance(chunks, tuple) and chunks_larger:
-        errmsg = ("Chunk shape must not be greater than data shape in any "
-                  "dimension. {} is not compatible with {}".format(chunks, shape))
-        raise ValueError(errmsg)
-
-    if isinstance(dtype, h5py.Datatype):
-        # Named types are used as-is
-        tid = dtype.id
-        dtype = tid.dtype  # Following code needs this
-    else:
-        # Validate dtype
-        dtype = numpy.dtype(dtype)
-        tid = h5t.py_create(dtype, logical=1)
-
-    if shape == ():
-        if any((chunks, filter_pipeline)):
-            raise TypeError("Scalar datasets don't support chunk/filter options")
-        if maxshape and maxshape != ():
-            raise TypeError("Scalar datasets cannot be extended")
-        return h5p.create(h5p.DATASET_CREATE)
-
-    def rq_tuple(tpl, name):
-        """Check if chunks/maxshape match dataset rank"""
-        if tpl in (None, True):
-            return
-        try:
-            tpl = tuple(tpl)
-        except TypeError:
-            raise TypeError('"%s" argument must be None or a sequence object' % name)
-        if len(tpl) != len(shape):
-            raise ValueError('"%s" must have same rank as dataset shape' % name)
-
-    rq_tuple(chunks, 'chunks')
-    rq_tuple(maxshape, 'maxshape')
-
-    if (chunks is True) or (chunks is None and filter_pipeline):
-        chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize)
-
-    if maxshape is True:
-        maxshape = (None,)*len(shape)
-
-    dcpl = h5p.create(h5p.DATASET_CREATE)
-    if chunks is not None:
-        dcpl.set_chunk(chunks)
-        dcpl.set_fill_time(h5d.FILL_TIME_ALLOC)  # prevent resize glitch
-
-    if fillvalue is not None:
-        fillvalue = numpy.array(fillvalue)
-        dcpl.set_fill_value(fillvalue)
-
-    if track_times in (True, False):
-        dcpl.set_obj_track_times(track_times)
-    elif track_times is not None:
-        raise TypeError("track_times must be either True or False")
-
-    for ii in range(nfilters):
-        this_filter = filter_pipeline[ii]
-        this_flags = filter_flags[ii]
-        this_opts = filter_opts[ii]
-        if this_flags is None:
-            this_flags = 0
-        if this_opts is None:
-            this_opts = ()
-        dcpl.set_filter(this_filter, this_flags, this_opts)
-
-    if maxshape is not None:
-        maxshape = tuple(m if m is not None else h5s.UNLIMITED
-                         for m in maxshape)
-    sid = h5s.create_simple(shape, maxshape)
-
-    dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl)
-
-    return dset_id
-
-
-def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None,
-                                  maxshape=None, fillvalue=None,
-                                  track_times=None):
-    """Create dataset with a filter pipeline including bitshuffle and LZF"""
-
-    filter_pipeline = [H5FILTER, LZF_FILTER]
-    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
-                             filter_pipeline=filter_pipeline, maxshape=maxshape,
-                             fillvalue=fillvalue, track_times=track_times)
-    return dset_id
-
-
-def create_bitshuffle_compressed_dataset(parent, name, shape, dtype,
-                                        chunks=None, maxshape=None,
-                                        fillvalue=None, track_times=None):
-    """Create dataset with bitshuffle+internal LZ4 compression."""
-
-    filter_pipeline = [H5FILTER,]
-    filter_opts = [(0, H5_COMPRESS_LZ4)]
-    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
-                             filter_pipeline=filter_pipeline,
-                             filter_opts=filter_opts, maxshape=maxshape,
-                             fillvalue=fillvalue, track_times=track_times)
-    return dset_id
-
-
diff --git a/src/bitshuffle/bitshuffle/tests/__init__.py b/src/bitshuffle/bitshuffle/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5 b/src/bitshuffle/bitshuffle/tests/data/regression_0.1.3.h5
deleted file mode 100644
index ee8373f7165c71ceb4f62d04bb988f41cfc5524d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 114447
zcmeFa1ymhbw)l+&cL)vv5`udmxVt+9cZc8(!8J&54-z1_I|O%kcL@%`HTj@{PG>Ti
z{?@Gjdo$gS!>ZcH?>@hCs@S!wZe2eC9zGaI3`l^R01gfS0s#M8^nUm4F2L0R{+4y0
zzAsn5i`<q10QlaeKLXuG00D0P007YL@)7R(|HpbVQoOtX*ER3^zn6E-0OlXv#_s#M
z&wr5rHwXyua!TBcRN-#8M0c|NZk^pF0kmG5m|5spTIy)u=DfBr&@*^#s$qOr;eMS`
z-8c9X`K$FS0DRj}|88C0u3wN}u3w4AxAhtBK9Ah>R+Iaqt={9dfa9+G!*53o06@rH
z`a%4)3EZz=^}7XFa5tPDm-`j@KQlgQF>xM103ZPHyY6xC*6sbL{^L5nKfe-z-8Q(d
zCvlfHgZQ7(^S`~<U!4d3k;|W*2Tl=hM|yuAuy(p7e19IOaJ?<(xD^0)1GkZf^S~XD
zzqt6-dEl|*{gL)RGeAH9(A)ngcOL_IXORH{-e$DleeAsizCL5zlptO2Zseo8G!PKb
zZw<h&+uuZh0Z(sjM}Yg5w;dtgopEmJz*_joe76j5%m34R??#9;&5sgDA_lnz@nG>Z
zpYzWI99B%Wy5<Yyl*jC4jz4N3!A!{+HtJ2tE>q-rs!+*pyw1mfrXbCM07E!E+4*c5
z;}}K(kix@-d`k2T#K+QWF@6Nq2=zUTTd9wsnS4*8IFGAiD{nzo@Yy>PD-f$U5^6v^
z&kw)U&&^(ZxuC8=SbUt^pcDE*w41+&FS73~9Ae@WmKZxZ?^sG-!LHs#q~o~flStQs
za`=5Axm^5Zddg`gqx~<Uxrzb>;wPUm&J?1LUrFC_yn)Ygy>abPzopRAV1cEp3QIyF
zUe~e<gRowzkUTGqc&0WhS)_d?Uf+*=X#mYX&k%IN=VQUtCIxkMvGN4Fo>V4-@aYIq
z^KCCo0H4`~?U6TZA9ZQ<K$leTcUr%E-!X=+LKVU%ri@&{fMyZKTZD5VZ|>jEG3vd9
zijq}7HK6*96oHi~oekjIZ7)YJ2=awJ1=_#|YAgG!w%bF$j^(d3<Fo>ioun!z*{Pzs
z*$K|7mSkoF0SrM!?xwXnCFfw84Ne(m)1H<^g{EEi0^!<{il_ROV4Du!IQz7rf=@zu
zGdsmPIY-Mq1-^ERtVR<R{Zm(2H6GbANhIE2UKt2IVVu7l+aob}@PfU0x<Kh<UG?EM
zy=956lq5QzDXbk;)V4DRA@D5>H*xd5QJ;ya@Lt`B#&j6(5=`2lSGKv!$)hwg{-{LI
zB3YDLniSGgL)8<45H_ZGZ578E;bF?8jRnQCWtJZaEIYNsaPo2MY&6Mr(QWLbW57zZ
zQ|AwkAaJGzFrm>xY9R1Y<{Z@*vHUmRNK=@FM>IVN_6tOtR`8c(YqZ6CYP-$6Izg~F
zq-9QjJ<yvleZ6s9Z`ad^Tn26g&NolZ6Xd~aQ|C&<C21G2a-i-&!Ecv`pTh-^i6HV)
zhA!8a;_LaL(Nh+^$*>S`gpla3ET&2<*3inYZhK*b?R~c;70q7!QpZHa__A%>M|*Zi
zR=9rQ%*(5!K)hBTvl7Wv*;PC!8cm=xz^OU8wuW5&anV%y+kM-xmmqMry+{j6XQnf2
zGblRR$bTTSbFQK?P{QJ+k@9?}oe8|7tGeYxvlLKkS%kgaHFpZwIpHq_JKB#vU+MBm
z25=MoDa>^*7<&*d?XR({;*=!iPEpAMlt=*j=Rl0gYzazk#*cQssKx%_3>CzYzKtWR
z{9NNGrh?N{%vWK!EHQ4`oW`U4C`<dT&e!9`o|0(4*-qVkSf|(CSC8>Th$&MpGO9VB
zx2TPoW3jT(S_RBSja9}ei2J1guB^wIk`TobWP6k2qko=5mi(MF&cuNjK*_eM3iVE1
zEAt)HL{~bkRhvb|Viy4iwi!l+gVh%J<LDRa=4X-R&Xp8}RJD(#X>95<E{n8Rktela
zK44Z-&Uj)0=M#UyC|-9Qx9?W9l<}nCv~zMW-5O)12M^irfRMhV+u(1=L=}=0o;Hal
zrZ&WS&K0m-oQg9o{}lZi#!c9~-Z-we>!}_WLY3fu*1w&&qaGj7^zyVoRFmRZ^h!uN
zWefe|p>p0(E9{*2tIi~sRnp~ID5Wd0SWB(pHYI%BwIH_qZ<>3zz0{s>Ys{!`$zwP?
z9*aFBoSC)eNvW&{)^CpqR(rh42X=Y_j9?Y%UzFn0ZyL~IUcoBU3N#K$yNY}|F%bQ<
zL+rK}a$Fm1H`ZN5AE0Qn5#-`%;+zT*<^!Ou4@%1*T(zUY^PgSb=;l-~3kRR*y~o%I
zm4hYhpTuov2NP8Gk~cKGzJ>=N*+XwItEFR65Y*+gJ#-`R?)X$BHP_wSlbqH6$q&hj
z@y&QIzrT7s24u}OjtX}l2?zi9g=&TLa0*Ci4<h5KS2Dp=cH4~MnY))IAEzML9gd?v
z=x`Fb8)B)m*(CN9Jk8C@;^W2ijrq8g!aT_-GvGBIPC1=sLu;#Br>JUcGm!Wxz64L9
zf|Z0tP^h5himGRXMKiZs&XkbIrKy<Je0n7$=<L7%umNn+ie%Fg;f!bZlSV9NshyN=
z)F2c#9Hs~U2a}$c8RWi+Lx#W$hoHkmO5-a|!i5Ti)!xa%&Zn_>@ErWZQp)4jE|<Q<
zmeMjnZj+y6wFa^12QSl>W`po|n~99w#_~;v>tl)G2SEk_nhsA^f?WHP#&`|)*WI?>
zVwYsTfqVXSnAv{FWL0KJbf04wael{ufXD%8!qu4uao4UI=6Hx~$&x#j*Zv)-UcnbQ
zU2TvMyJzk(PF8>)$+ObxEjVOy#Oj`&k`ts;Sb7so0k0`p)*`M6>f|L~IDUZWN3AJ^
zNh1ifJ^+SexJptX2PzzZr7ajaK$>a|U!OqDEq+8=xHjK9_-IsNZKCK1He9``ca`|k
z>Vl*&OA?#?%-c%QYY|euU*nnCBbZD$BZW<-<x|xwmznD+{_BU^>mm?<=)0jfq5Wpo
z{BmI6cO`3oa{YsQS5KezmtFl<aBu%BX#VSZ|LvuK0AT-b>`LI<ZlD14e~a9ir2yd0
z9M=dQ0caXnS?XKs>Kf}X<67$I;ObjhnOQPXQtBC4>04`3T)TZJjSTExlbdK*S{qP)
zul+}UH#2;*aa?ae*V|3_-HrtUiblE3y0N8xUkpFnvG-TVx0U~K|BvemRsZ*{{;}$B
z>HBrXf&5R4f6v6di`l*SJ?o!F?$?#R);}%&%VgeMN8em?-}`~@<;}m%e-`SYe$2g@
z#GCN1>EG6Q*gp_>An-uofxrWS2LgXh0)OV;yz#8ugnRk_-@lm+{bS49)X#RI?`^?<
z$^Osm@_P>V@|QgR^LF_f?k&?BRev|<e&+wL?ebsQliz#%apHV#oK}Ab_ndxL;16XF
z(FXz#1Re-H5O^T)K;Z8|;Lq&Idq32@{H6W$)BSQE*jqyPEpJml+cSSHd-8rX_wtuK
z{`2-E1L^k(_}!fQng73MPs07)<B!LI#m%kMKZJWuzo`C)nh((j0uKZp2s{vYAn-uo
zuS($0?8%9{f!)hL+D|w4s(=0brQ6-3!$m)s5<lBBe=U3Rp2NNTC6E6ld-7%uIIg#U
z^4x$g-F5zTxKZ`K7~p6A|DHWL`opaL_Bp@pDp<eAZ({eHeplef;)m1&fd>K)1Re-H
z5O^T)w;=#nbUhT;>&<=7JX8t^xj976_LwWfZ-IaQmEmFU<F9aEyH%u{Ox|rLf+ys)
z0bfvNhgnYq_0C&NMS03ShPU)ck7V8_-%TJx#rCC#IADJ2$=CvtGP~yWu^yM#7ec{=
z4p`>f%hI=p=2J9^-UaDzz{J7N&RgwFjAAf^Hw4?lY8-NxL3B`HBTR3#NG-2KBEgHP
zn#jm3DboG4ki&!$D&tsSj4r*_Dk+8c?cB9@XB2<5otP-pgGLr%j5ow(KmB}`zw$DY
zHzxb5+!JOEUL?AZ(2>zbK+_BWY99N+y?)(TC;CmSRKGlc@M39({quT=iPtsOtqtS~
zmAk&^7gfB`1Ku2&p&w7|#Ba6WjYe{lYN2(6t9|S{4ax<Ks9(|uyCM0aUIDR(PEQ5{
zyF|!i;6<AF#$|S|US93Mn~(T3=8pjb*qH=p7}-9~@gBehIe*(_Em#ikr$?5q)<dK-
z5&jb7pN7f7yYLoPJCst0TKmN|`ht*hLZ2S5RsN=m`G-fjt+*Qg;EjuEqt{x{?BTg8
zuqb&KNEx&aCssf)y5+od(4+n$E@J=_9H@RqG!gwao%r%o{{EWqQt!&prF6Z^3{>nZ
zO@oXx(Alw8f~{ls2D@tXwWWPy6o&T@&EfU>(LvQJCx%%2-HxDf*@tZ+!JhqRWag~P
z*zTgR@1jd5IFDLj=$uHx&UGGxU6igDKKcx2K?CMX_v(uutfN>L_IA>)YD(}6s5hC$
z9YdvP9_^?hxm>(x(k$=mnt`V;#O-@v?KdQC4h!tlapUD+eIXNJ%#J9a9G1&DIl^XH
zt)AuPpV0a=Nl0X-B54hmUwLD?S?;HBH+@wvd<4H8Y6W;+k&&tK1wJlCO^f|aQ0Xbm
z8kI!AoZe=`mI{7_?je#->y#Z@a=ZXzi+~!h@?vCA4$jlrf<o+(9+=LWQGIMzKP$z=
zSX@!oK8hhfDspV(YE@?Ct5Ts%)GBc)TIse^J*>%$6B>IAA>({2ayu}y5``Lg43=j>
zK;ay7%zed)>as@(?ww!X*z%&oHfx!z>C6lQ`Xc7d(0x`wP6jBm2YwpDK&t;Cy-_DV
z0}n7n8V*&-<22*_l0_55t2BNPUpOLMH~FF0OT?fr-gV>VehqDK;lo(SQifrdP=1`o
zZj}9~wph9KN$@_s@Us`ifeKh(7};K}gU~{zj0&WA!;w=bboYfc50p5~-d&Xec3-ar
z*V|PI_ngN2BH{9VAqXoKkzP=YnLrb<?Ka<uT6Os%=sepXhO|R#Pb}9fkpyMf5ll%S
zm%!Pe$I|#)r#uEz9SBDjB?wEG2WP~4YS;$=WLl&aP7JfEVQ$!HfMK~f##a;5k_liY
zOeDrLx?oTFOjDkW59$iNGgzx^968z4Fc?miyww5~zT9Y)RhofA<vE-sv<H~rNZWj4
zrl4(@=sQkM9k$P^QyU%~i#V!GiD9EBogwXN8=aFn6^r41l4Bxa!vQJ)wzd&$Z(qcY
zJv$Mz+e+57?rH!xlld~S#0v<1fo)hxrYYo5>%G{G(`kv3VGjxhia`b&uI6aQL^7h(
zhaF*kt3`h_NPQXo(zj=eSp5@3Qgvl;x~H}lX8wot#gi&XFdQhmQ?F4KA8}VPY(s?L
zy+H$NU1+MJCH}(8@MsT)R)VGSWV^mdlGlj-qI?+3|1oBC$qLkD3{l5>N(a7=WX~v9
z<zd^Hic)Ym_)WUvCPZYIa-QH$J%vZ1^Lp+f)>|cVt;O{>Z<Mp5Fc=7GOK^`IJb}#-
zL<P)0poV6yu%W3z2&)9dXC?G!x55N77Hkjpu1_T9M?L$jN?7GD%_V~W;#oaJnED^*
zJF6u=O*ZuddioZZd`Re1ZgtJxW8(6mVnCa2Ydd><2u2ZusVxWH&$}n2gk;9&#Eogl
zw(g%$`XMwt$dKXs1Y2gFmXJSfSY-6OEUuGMFfhu(K8qG){NUL0?h?VG*2RjVsc<B^
zg)61+i{wgl^4jC_#IWZSV$+SE0&qmj(kzA(R8lxk8`m_Awg&2uqT4+-BrV|I!w*CZ
zk7sB(QZc{tO!UExA<0lwAFE#XtlHCahv1na@PbI&0}jD1E#*N==<kn|ZVJ~c1FP0@
z>jt9Op6P?{d*ia>Lw^9N?!wE_kTH8UmS;;ia4A5DnH+<NyeSd=JT`~5&6{vZ6XXrR
zRDd|z+0$uPWEojIVM&*GWBa-n>4TNrxV?qp;nC6{62Nca0HHejDu)O6#r%&}h4TH&
z-6Jp*MBakPbu&12U0}rdt6O?(&`eB!siTDtUKep~cnN95Erb~Iu4V|}@iz6+UcdLq
zyh?$Eu{{0)VwyoweIN;13)RQQ8PIMupn}A_4NSPn4nHY`h05)o7h!|>ohq^&k*3rz
zo=l+TF_FDYfK^j-1Yfj6kE<D!D!;@<#-#D>K1poP)YE?lfuxw88aSu*$~R!507PZp
z-yYk0?Ru5`mxe#{C#~M;eJ_9MKl$nYWDxsp)9xQU4?p`e{#yQ|dk**VmpuOS{-kmK
z+X=W)^>=s9&;0*Af70F$vwF8ku7x7!_xMfhp40CN{8;>udLZyX;DNvcfd>K)1pWpD
z{>+}-xEtfW{H6W$lRYWKecN>S2g~DUd*-iYPu_F5m%rrkpSLH8q;4nRM%CZVxu5y}
zd-mk%53_o=NUnu~;`jJX?4Hx_3jA37ka{5SK;VJE1Azwu4+Q=;1OSJxhvIs@UEiPd
z#G)-wyVzMM^smU}LO^80!B_6oaTo0a);$S1d=FsAC*UTu_mu!RrsW%eP0Sb1XGV!`
znFy~Hlt{wU3_g4<%HOIQDn+3IUVg&HXWW2(LQ8M22V8;Rj8@;a{H}b2Mc)MlR%=oB
z0_Dx+%e+mGkwQzg*pA~0UwK#z7-r^CrCTldq&2?TL5@l(a8?mGohce=!Q)SqR67(b
z;!g6pO_AX=eJXAS%X@}xR^R<jqVlU0yQbNr$yfreb?T+~AjCXV^DPPyo${Fism~;U
zC2V4cfG>nw(ci<?CbT>Q$Aeq+)tUWZ#!fWrfwNR0LuCNIRihLV3cAZan9sUv?)Wn5
z#>l#6;g5&|yFLOe1<5eW|Gv@}0#}FbH2|<+KVNEm(Mu?fjpaV|>5X|pP61F3pK*X=
zl6fXGN&85dQN;B<O6wMuL%pnUS7DebuO@H(U?nWg#VY<rGj*O<EqzdmQ(CVO-qvmu
z`W|RI#d@-bxXOyABJH}Z4-lY#OM)@2;xp0;CcM@{YL9cA2z!j2GEztp%74N_fiDpx
z;N=T^=8h=5X1qO4|3g?q{g0z^!Wm_U`Qy2&u+qC2QSS5*I6Sj1y{9uKE#Ht#xCiA-
z@O|T>1x2TzoY~86Jm&N@mXuSYk3&l~#g7WrMflkXyd`=}UOGS9ILS)}nAbrJY3nv&
zs9Ku5{4(Nk0<L*n<-k5sG(?n9lbh5y78L{n3*PGPEZLq3qHKz`yQt@6K~KW^i9`5Q
zc^a`WxNS*#W{?|PMtZ$_gc4(c=ct9#)Hu6=J?&gj9_smJl7~RWpg^Kl_j`v1op&At
zu%%_?sPWj8hDWR=lO_mF{!^=C?QkStY@&QV4<<2<6*uHw9o6uou!Aw>mvKCWU&N1u
znrzxJV8c^{&5LhU-A)mLIsCfqtsZ;9@iuZo2_L%=?sUm!m2Nt^3aR;U2vKPTA)+IQ
zE4)OVYy5>(;VU4tfamEmER!sW(t^#S%p`f6^F|e_I3l35*6$&LCkS~Dic)*qee_9J
zI}!zB0=7ame0@3N&J|42xEV9mt%6tR`5w8A;Ht?u7i#3D@_q~o4~nQfm~pBdzAQeR
zjF5T<K<wpfocWgDC=^n?k$_@2$0c!1&3x@bQ+cms-@qpVj??9wCQ|pM(&dvk9V({%
z%S!@P(XXet<a|G!CSZRAalII-PQ~DT-Aw(Mzn<2h@G3zfGOAOl{>l&!A*EOLqi{du
zc0w7iPgoZF(il|KsuhoE!5beih)l7?Ra+LRtA>G4KL|y(tPhM9AT_p;h>n#c@S%Yr
zc$PL7*z6^~Q7B91G?rw}RKoBIy0nQ9p{Rt^f*l!ZA4b$a562(Eix&5*X>QS(Ojky9
zySHpvhZz!KAEj<m4N(mP*LnzQj+y~R;cjj+2U_HCC*V1kXY!h%lI4%lOU%=9`=fZ{
zb65CZoOI#5Sl+lia>+FcAp`GpGD;X6<T;iqDnKZTJjhh{GyAHAIu77})Gblm@{Fpf
zz9q64i=Z$E9u6DVWTfrspj9Lq_T?1N4mDmINd^p&1y&A56pn^u$K8v)fU2(#j{57Z
zaj)>xo_ArLtUQcS%F8^TPiMuS!?Fk0+&`oHBW@hjCkx>}ZVy%n)DLEgI|+EwPm`AY
z@i4;l4P#A@!VYT-g9vM2mRPLk_Cmf*9<k)K0O!F$rifiaOjte}X(1eqIF2gIXR|N|
zK#w%cLR1)*Zg;UuMU%k~bnHaHU`9#>E&BzO>zv4q66WJLz$3Q)nK7YNUx{B3(w}<O
z^fM$==3vu;1k)ku#Ko(u<`%;B7Oe6SPG!24pWs?Ri`maMjwBvs+>YWAD8jmx2~LP(
zNMju~AXK)Z3|}O;aYVNxMV4~nd<(clUM;O3E{;ksnoJJCF|66Xk}uREArQTg!z?If
z^IV@rtodhQ`&4(YJbb>k;Sf$c$&S6q$iDP%se5Xt?=)Eh_a{g=UOt(ZvU^Jrp{K7Z
z-z!%dk2q^LVWJ^oy9PJywhTBAZS-Y)?sZ9(B&ctqX~G60;Oz4tZ{!BUUEsdi6#LG#
z@#rTkMgIEUTzGA^nd2}${7ym6X9FH9&H>I;x1&ht+@zt+%U0`EpqXRQGYDtV9a4pt
zPY@mtGbest<gPG_0qlMTM`fbxDqzB*bhTHwcG$~kFc~^eDsSrr!X{5vH9eFh2_{en
z;chA21JsO(G<<}G(wnUU#g~eYP!A@a?c8Jet+s+O6Qt*Z5;sn*#LHzMgsptdo>gCK
zkDX0{OZT{zzypUS<z3REZhXep0!}fZ1_vOMD0+q0TGc!eD@{U)XB(mXbIJyJSj|D-
zB0zIp$zBd(2T!C5^7*1)kHTTkfE^}fuh9QZjAY!Qly~!2IlHF~#vxOaG_8XMb|o-O
z#=QPmpdXnT6O{x3x}fEu<bWbxoy(rp3oSR89oMqVjRo6Wnh@Hh6My!5q+~jq&Fi3V
z9~rADF8|g_n+hNf38e=QkV_L6he+$ag``Afy`8*I@-Qn+F#PY!`&y0vmGEc&r1d*3
z?&Tl-CpWJs|Mm0k63VwttAB7M{Or&8Yx$GzIo!)%^7vo!C*8~e$Mtp(`48{Ov(vtv
zfE!hBs{Iyz=Kt^cljiO}@V?_a38nU9#&=FX=0Bt!2s{vYAn-uofxrWSzYl>wvnQMG
zhI%i5X+QmRf3m{lwyEF`mdDTb%zu|XsjRi0haP&6Zp<L2OdCaWOvyI6QpV8(Y0`xA
zqG&Mhd99^Nb+)R)YcMzM%qt~Fll|^vm$wiu>+@l;J;z?;d28xt4gqrI@6|eW*lf$S
z!W86+UHp(c&Q>Bx6?e&VXbw^n1;R7W;K`tR1zR{{nysbFFr6zzY`I|N_gF|*W=O=|
z&Yo19JOMoOi-VG)7COZ6KJ*akiY<x|TI=Oe&Vy|)iL{9<8aqKO?u3<V5hT;X9!Ate
zFh+jG>yyLqYE9_S9P@=O84Ge-K`eQEn4Ov``fmPG{HMy!A=B=<ScfsPzAf0K<@~DY
zLHnM~H6h9`tQP=E6)vHYa}(ACU}USo6qrdVSrHMGD><Ci?L;Q=XBTQX(95RFb;R8^
zM%BWW>uvl`9Ogly!MJ&iSl^Qvsa6j-*JASxH}^dvAe%l6C<o~}U6Yitk(8(VygcoZ
zZMqGlWUl)`OLa89#`}DaOcJy+v#UwlTZ-21sW#-i6YXa;I?O{^sd*GuP+<H{XNQV%
za8v#M91lB`?44cc0UCBo8K1u1OYq>s1)p)+I?`p8As4uh=(K(DRg`a67S%mjj>`0S
z5EzsM^ib5IOQO_-sI=7svP~5l!B9Gw)}ty<JB|%6*IS4>)$zVkM)8sD9O~kfCmy6;
z?MhG#r_*>(SvY;A@QxKMfHaDV=Np4Tc&&_?L6P2w{ha8Os1ne}U&n(iyr=>0X*xtH
znQgX$xUIS!!>sVAGt7=8gWlsNN7>SM-ItWmR%2;Jj&W~lg2y)G5N;Pgkl?kfzuvCB
z9>|pfN7z=-qBb>k;)0bWm{0Y}0?b%0bc9QF4}$bxq&WM}k+u|dy<>Z0yV`=HmASG$
zM*i|W>T6wOd#WS3Z(s-O;S}b}K;5k6?c#!*yV92|VMSSRW`<eguN&Ic$b%4L+f{W(
zTQYT8m$UFfUEw=obhIi!(87ng>vEx#TR8x?CZ*3UU8(n>8a9&W+JKE)VvJhQ2gGt`
zCY9B8n3f&X-15FK4l`SK({;7di?qNDlB=v$o@a5J_E;b><aXSSVlo+1viQ6YG2pnV
zUoVb06pP9HV&4a`*0xShbsMJ$O_LXAVoTkMsU-Vk(bF^Ub!H<*$=HFZv+s)cYm(Sv
zb#;MPT~k7C73$<$g^mJ!JtvSX)c(85=7+o8S9{1Ypmo}!XbsK`4&sNU+eguK7#e-#
zS*S-j_!HHc;LQnzfYSWc!53WaiswG`{v^0l)vGRr){(oc0&5$jv&A7=5_N>Z{)5Yq
z=1mx!`P4Q9y-%+oO$<uA$j9GoZE{9T-@J74dM<EX3<MGAk58?4wF$PjXJ>2J^?2Aj
zQhF(?shF7wIb{dOw@}8ZJ{Fs{dsUpYoT;QCP@jjxx+ana^S?slFioEtmq|cpfk>Pj
zmxWSuMg&}H8YMbmeQ{yEZD<`CqNl};YDkm|%lSk;`~5;}QK|bQ`4Rdmh4z=Rws`E0
zuQ8M7pF@&}2smO6AS;Y*Kh^gO8K)1+a5A-s`y#Mi|81Pm#EL0?bmYU~E1cBYyrh)F
zD~!G`Lm`U>C7t-EJ6<3azb~_thQZR%t0uAczGnG@PWGEmT3IxFjymgWT0TRC=EfNJ
z4|diis}U_0^9LsP^ZOxy#yvm9AyfV=pbDWbP(^44h_0E;*oErk5uxTfkf{1DIDT!a
zWbA2wq+ASk-~O<;0OH{YCrXVVUCB(%3vxVSj6<z1G4@VNi3)AUPk3>ZxuGJQWG+r=
z^k~oD+v-4{J?3zRz0A|6oGfv+jpoNDOBrR;k@vh{sA6(_VskJVy|#?5l8zb9XZBpg
zCT8DuikjPY5Pb`*QOI;JKrJo~g&9QuqfYi*rY^QNvJl5C$=4<vvWUpV=XU40qX~Y2
zpg8-ceH^`_A@pBonvY_aqrC>PQMH$kJp1*gx}^AIdkiIxI_EB(;$%A{{5&bAtHwD*
zrv~cwEn>nV9n(418_d_WJ#j40l)0SF!&>p#PsnLsI(IShvBa3oP`?04I*;XDcqC}x
zNeK<QCp2HUS-J>gJI5`{Y6RZ<zLU|}M}Ys6pF5UT945E)NhO$@Eq6uDdPAW<`X<N<
zL;lnx{k!05$H}?ce)*5UrzisR#r-2ExLbVK&3SzFrqkvvfSJ!W@zK#fDP|}jKTFrf
zA}nL_Xh%RHnV>gK<VUgaZ_A4<&&pU>Ln;e+={L7i7<<vaJb|K(@Rk5`G&8!u*vCn0
zt~?{^<1T6NBpB?97IQD^zQP`l>T!Yk2pDXpeb`mGV9AWC!|3WjftNjgO?anHsY!-d
z?HeS*TwSqAousQlUF;_<6(1WmWKtGUCSC;ano&f#iMYq8D6Z^$;4Jh!SCy=Jf#`X~
zWh<nGo)pI`A`u&ZeaQTu34iu}*LyqYUjEV^`p^55+HG!U?M7AL`w9Hn{`&X)NqRq6
zb@#nrzev{Xd;BK$ozsu`52*(N4+I_vJP>#w@Ic`2L*UQ+NjG2Hz6tm8m-f?7{v>^i
z+m_ltSROyyGyh%wq+6Z;I{W+I*}At1(TM-G?nHOR6*s?7_%FiE(*`#IT;f*#YR_?3
z@X`2JHU1j;e|8;lzy9y#FOP%&{B;Dl*DdlJRsZelh`+|Heb@3Ij{_o-?`8iic7GhS
z^Zm2Z?|Jvf0ou)5BmPC;xGSr;d9TA?IQ;7OL41EWi+>*=L*V!LP3-&e@nil&>Vd!m
zfd>K)1Re-H5cs<h0J?b;vi^FzzEzh#>dLv84hTe-)!|hy##P?f0E3mdx9OnIpD2=s
zV#7ZOLvfi1_sVC9S~KKw-#Dh^<JS+|9w1wF&h8hmWYGy4U$*5{d8VV&nWCr`Y)Xb+
zJof&A_Eoj9Bpy@XaxMs)i6-oX*wX)-g0>7e{L7uC8ElZ)UbLx2k4@RScJ34q)oinw
z@ZMKlCd3He8aYf;O-aK(zm|1mz`<DU6DW!5LG7L=&ELD4!oZw@uN?~@EY^9uzA#}D
zyekeMM(p|g=ACudqlg%nE6Y+{T>eB3Rh0Nd(3Hh?E?i=HM|r&YbrG-?ATuqBy>qbX
zf?bD3wCgaLnSdfpqS|N4elGXvDq40ay53^9HIo#lvGY<@NnmJL?LyaywORqV7PtL;
zv|(53R`DwPnW{YJ@kae;Zs;FoY6b%}1M14bj7q2bYo|NY?F1({0PxtM4^FU42!n;f
z#;>$`=A@dGpYbOe1J`~F{Dg#{{p?-uIsk1Gt86&aZWs{%OGoy~%Bl2NtEzQ+H-wwV
ztglD$^)1jeM@v(ogv$p?A#plk-*ZMvQ|UI$5sg;PG@nKHBUp+ohn!&74wJ&ly_^YA
z#c`R(#L>D{)w$}IGsO9a&)>DEZ@I6-Sm6=u&X2Te;+bLbj>&xube1Z<idG3@pOV>c
z0*DDsgaaqX`G$XZG_30;Rm`5Dd&1*&)#0OTyWUJydGa~O{~X}r+aYRW&otJgfmx}!
zk&z#0Yi_+z^d4_($?KXrW5%wyHP=Z}q=q*j4R}>zghq8Fz4oVc;n$<U*n{8>q6P1S
z7o^9*ZP(T@VB85O-pxOp$cz5tJ+Bu}KyWn*mjXzW{96pmWFu*%HyOnGYVg#d?Uy!F
zhsI0?%Ic+Q4P%OqAHh1mCXB&!VbZl`6k!9eJ&?u?=cNBa?o5R9DP6qwLnbhp?$i_f
z#`j=Ui_ugO`ca?05FESkPlLI!WS93nUn37uIZQ>K7xLoW*pTtoUmATOeyWMS!<wnF
z7`)Lvz*krSfWz=;cUC}9#;1X<vYYmVDn?OD5e^D#Pa_<$Yt(torHNfp5y=NL%2LN!
z<pe*&E4_XDi(^ZmY4#&cPl$MeI)sgj^b>-}sMcv7C;N~U%C7w?PXZLn7XEQKPoM09
z3XCv_*64VDt!GuJ!$hR+9$`oFxq5vAsy1W;dUZPKk8&?EmyX$O>X6=`KJCtbtB}+A
z{!O!NQZ#OS2?A_iFlG-ZQT~~ei?i!j$QVB&I_MOM8gmT^t(dEV<^~L!s^+WDY7t!=
zAKn$rY?4PcCWnp*31-PZhwhNKe}Y?O4#inv2}$uh`xCg(h}FpaY4S>DhB&NTuie)x
zFi-3-1aTucZ|*cbwDU2Ij%Qw)vvTUW9SONFDF~HvnmS^ioq#*TMhBqoSuMG-y;Fg6
zXeJkfYiV`oElQ_39Pg&7E0cyK@rK*pAlM`Vy8M7U^O@Mja71<L1>G^vQJ*yj_2?TY
z3gf<WS5CD7-jFsf5Pbh36EnqzW1Vu~(4j6fS5Ine|LkE(2s@7)vp7j0N2{E$;$RCV
zT*Xk&flln|5r*{wpA*@r=Yt6=m~0lB?g=$6O+<9^({SE<uq(zceaC<u6vj4B*6o`u
zH>B)8GCI-HHnN1ti)kHG&}>jsDZPKiB;x?cd!S9ewOy45JRAN#B&&(3S2(Zv6&G*k
zQN@co`#jmM-2y-2e$kH>@nbEKMfzE+kt61IjpckvnyN7x$$Y6PiJv~Kuw^+-=gJuF
zBuUAYy&{DnJm>~B?%^EFEO#uU!ajsSX$-~CIIwRkkt4*Z2u4cm-ir4dcUS61^LqaX
z*1=DGbX}d;%Qk!76EnF<=lV!_2oD7Q*9rWY|G556qkH*BzZuAN*{{FfQXhQR^miZD
zP5QsxfBa{D#QM8I-OFF{_+RoP-pm2V^>**x`NQ{H6r!&4uS5Oyc2n)Q@H78=&+~na
zALXz7NjN{u>i0Ij@4v@yV)b{)-xat^KZpkc4+I_vJP>#w@Ic`2M&QOcxOt%(Akf~I
zNTwD)ndQus8dStIB-W||JYun6>b0-(Ij^pN$v~wO9D(gK<g%lIz+7&v%@E`i(FUp8
z*-JHU3s;a9D*1HQxnd(Xu2jc4pGq7i3%0A`AcN^(h&L@%gc^3-<x_xd3YaHAelb?B
zqJvy^cK{cqHK?%du4GKV0OhsN@9uXOg->>?JkeoA>W);J>}q`FYeGyuWDCId@|_h8
zD@a@*na!HynPxZaa<zq$Pms8A^z!7Q-+a-BO!qR*uZo1B+n-j2aKg^^M6){&XYuP5
zV-MbzOgb1YSf?OTbh)F{`e8(GtaWQMN2i$=EO4wW_daTtbVnS@F?8#N;vnQ}o{+hv
z;OuPM>EsBXX@l;HKsp*cL0PEKZ#xkJlzPEM3)jbhp7Oew<2jCZp*1yUL@dNHd?X%B
zXO@4+3ANOxCJO?DLUM&Ek56vA=`THpU#hIvfX~5nBE{wNZ+0u=<oIt)`_jYjz=4_b
zfxQnUo!+-i43<WZ)3THpN|`=8@AgKNLv(&b^eH{PAL)#&J4P5xl+Tu(C|P*q67Iq#
zN$Lv(J|sgip{Wc@Mw*Km>>R*j>yF&gGt-69#l(*1hI{KfyU(+E)?42?DzlZmAWu||
zNw_*3#VB4~$YyV^$$1}PIyqfD2u(A9>luYLWo<?+X%2=2pPi&q6j|5jV<Qn7OKWgT
zA?`(`B#waE`rt^Z@Vl2ABMe24X=Tmhg&Dk*NtDbU7r#v?58R=i>VL7lN)f*$WbtU8
zlA0a#vqdtSP}0$qDP@12%nJ?Ro|&%GyfNWVnWBJLa0dEnp^yr=Z&q?&2&*nV=Twy$
zvx8eeuq%qEnV8q`E_Gh&QGFKNTe_!AOEZ}*faIkJBbk-OOpx!^x#QNN-zOtpGzC%d
zd4i%)yYQ)n^Pw?xTF#?aIkU`Z_TXmHm@aWbOFYAQilD1suteHkCN8VSBNHTr;dLyS
zwj-bkPGXw3>`9+-$>AR+YIwutUaYk(Cy%4FEE^V=%<Z~nkug0YwZ;!ql6|Kke%@^P
z0@zGFeO8k}=(Ls<=ZX%TU|M7@78sh^4jSbRtD*94^h7G}ZjXg^7J~I_i}nacu6}9D
zXC)xAIj%M2=xo3pk7?Ur**?)Dk(aZ}1;b_%oRrT-;H8YOqT(`Ba0T`>d3xeuZ^kMd
zQK3qEBKP`wpdmsmYQ32uDxSNj*P?6Y9F{C1pcX-2?$r>fc4ftd>gJ<^boyuq;2n*}
zaTbe5Etfj~<*;ZW2<t>aXEuYN%}0(VLX1Qi2;c>sDOkQST-dz)B3b06k8twPYa;JK
zOPdndfK7ICT7~MEoCI7nDW)v>W9>W)@sumR?Gq!d(J$(ciaLNZ*SoWV7Qh%M=NKe-
zZ9bQ>Nk<HZG?h{8$)AQ?0lNhOpYJsIfXr><Qo)(!(9a~F^Kb*pQ`YKZbfOnyxQT(w
z!)C^ca-<gHH7+;%1cG{|X%>vJpGvqhD+wj~kzzMMuY%zk%8)ydSrSA9vaVt~F>V<P
zTZ~S$w;*}CoUGHp5ZE?iXHvd{12~05f!M9sYz2KgncW<&037hH?JPVrGdfjV8hhIG
zfK5+>tZ&&Q{$g352WDmFNJ%H}o#+RYCNW5*#S%h$_OvN3N@0`Wm63(fh&1pNPJbD=
z!f~fjccrH+az(|S{=9aIxhg3yt>KOXOnMWkwk_#gAj=vIw;A&oXN_vR+6oPvFkl?$
z%~}wiAViCk#K-#uBvxcqf9b&_COQ^F*Avw@h(J{D37nTCUJ<q+5yeNP@cyd)-n1_Y
zDUV#?SROVHW}^b50!Wx}`YBH*F>dZl0T*cS(pPZ6h1QuMWm(MYm%46crf+s~J&6D?
zpmW&l0sv5PoHGb6U9&Dg7;W`z1<YB)w_7Q_O6?{W!o3xy`HJXmxDz_E4L!l(SW9&s
zh8GA0SGBcF2h@A!wx;D`RSV8hBB~P3b@okoAeoyVkF<lCyctd@_3#_BgSfg<c*MU+
zm{JaTU^o&gKLphy7<kSYa*(%4z2Ueec){uBLI;_T0C8}rm;`iUMc#%-d!C%>vMQeL
z@*<40NS0#-lo@uJOM312ZB6T81j{?2J<Kr)>LWbXzR<#yzuW)B{~P1^Gk;Ro9fo`P
zOaIAF&m*Satq8IoJP$wnGyc1tNBlE?(mjWJ`AZ)Ed4JN!%-gxUQT2Cs&d>b+J%5tK
z53~CHSQ#(;9>0m*bNXF@AB!JS4+I_vJP>#w@Ic^!z~6=dFxhv1Qi!|rqI*bM1qIIJ
zB2Oh<V4S8JgmqnL=T;jmVhP!f#=4)haw^;a`j9F8Y6=k36CR9^lu}YF2Yh}s?iq|^
zCt|ROWtc$ioK#nKmykL`BgK@&ZH#Fq(AfZg!;K{MKAck2;|@6s&$mgoNo2c6bBUM$
z`l!f&SU8Sl!t=pu{Ll$K)`LPij9g*zJb>`neV4{jNyuxovLzgs4$nXAFZQU~=+avC
zdBE|Dah^@#6NRA^u0G3v3j`6YTmu9an?L*(8xWYlw%U*U84YWrE^b)Q84P%qFL<__
z6A!CVCf$Y!1=theW6Wn&eHtHL6YGKU4})Byfx98D1*)K!(pR^hl=QjzFH$jW1)XV9
zlX49WV!fnGeR*{>(*uqLAHq0DI}$L+6p{f76O`J$K6BTtjtxFXHw}35lHhCSBZ><O
zubsxoX8{sjh^=J9@#+g^?B!VisHojc$A};GLtxuxrH)Vs!n2vyw3Ynh>Yuqs2{^Ps
zq>0q`5fmN|vQMy~<$0PfKUvwYc#^NlCK^6AztAtOlW~9$%)g7Mboq_K!_66o4O*%v
zR)#CqqZcuy1Bnt{mjh?kSZGHX7zv`xp==!oImS`tea0qxZWVG6se!5>p$22)kvA$X
z88-J3JELAUU^nWXkVR$kqcYp@MxIm3&K)d7=3>!j(F6+gO{&*V7Tx$3Rb*ry^+0|m
z-%wB^S#X}0@Rf*IEzijaxo{(O?pMQj#W#)UKx;p9{FL;uyaBIictAe5BxB#?1QiDa
zzp4Tc(4n3|LAv()YPM5XM6=LUSg@ir%0x$`cUSCBs+*#Pax>;vli$*LU>GHAq_zra
z&H{QMKebeAf~87Xm4&5D6aNsbos#T!ne%QhY6)Y$;3WE+MD_-xgZnGPLa}Eb15w%C
z%-l;)4G9*tXLch_hzL`1%ss%wm2w&6-Sp4xU#F&OZ+v;GmoUXqn=%=RX)Cq4$5Iux
z9cWd1cp>!KssdFR!A$|1K}17Bv121l8slV<9<(uMHeT0-Mv(hi$*jxu0gk_i;Kn}6
z7Yz5joq@J@;A)JN+ec1{#uLwQ1t!J8P2KU*@sC&4Yuh1fyRvi#kKvnhGBh{`oAW_6
zmQSWDUWwL0W+uUN*Hv0lc64i3!(fj>4M6O4YzoMe;5uT_iA=fS9$rbvaF;lhXFI*O
zs%T~m&%$FP#L|N1D11#p%AO>bH;{bx$+20$Zj~2<*78(rUnJzHQJ7}+l>xn>ypWNg
zmc!+zFkJ6C%6TO_+@f7P{E9VubE*%=ty7Ft=xm-TqC|KrTulbb3FyjctZRfZY2mYr
zqoA+gY3uQUapin_++Sf&O1}#EyivGe6r%o!Rt8vkaIq{+##L`ak8XMwTXm-dVvTLx
z$jEs`Gtw+d;RNc!AQ2XzH>x&^U$|P5*%6$I3m|yBTSOz~Q4PKyfuB<ceL=P)LybS5
z_cXib^Ep}$_M$v(h{xO#Vswz{QJ@+~(46yPo)e^FfUC|dF3?{hNAVAwDXH+CcULar
zo52@Dy*)tu>zg2TT?(=Rz4#T0RxJJZHgzm0e9DL_Js5+7FbM!xW#kSP#2B<@A&wmo
z8Dc%CuQrxCvdl?<WTWB0GCtZ{3utj*gkgQUcw^yp&e#H%^U2U;os53Y03AbLM5?ZH
zZ!|{&5pdtdeIe+@KsYAwo)O7DgU$u7Lr~tS2L@Tb{^9vPAe0<=V(K1yB5e*CR#qjy
zw8!Z&nDBmhI-Cft<-VTb=KP@YBh=tLt9UZb2G(?pts0-@<yvf$G_SG9)g*yo&Jwf=
zt%RK{%(|qb=b+d<SC2jt600z8y-}}&*K{I!8|0APd<n%?3@I7ATyz-D(7mz+Co%o?
zby$TC%ve2J&}#$><&Pa5S}G+vVZIOvcJw8qhzcENI5s1(pLAVMD+9I^32jE^Q61m0
z`^X)iNTOp1Wi!r!L!*raSG;qeBXiEK#HyZ+IIj|+CH!FE7GSK|Spr%PNV6(mjVWfo
zc;(Ij3<oGRg_<D|q^#>gZ<oe4+sel%+Ts%}^9d))_KN3ms+M>03Fr$D-XbNpcuyuj
z#*!uAcH(|UY%2y+m~#CFm}x;#f6RGjjrm*!5(Ma_#mMT{m4Ho|wW<~j!c;?H16OU2
z$~9<Kiqy|8``{h>v6amwWW$F)l(q9##!IYNVn#nNqz53eRTRm{;QrhETm1_R<j?#`
z19xM%m%sF%{Nzs}D!pyW^MmK%XMe_D%b#@5;a>id$A8|Rbb9lK^6PM;>hJEHpZWiL
z{v`b$X7#tv`E5tk{5^gXyXW+~0zVc%q#g)75O^T)K;VJE1A)H{0bri*{-oX~OLVv%
zq=t|P!eiQ!6|dM@Bp^8y_Gp{rRnOME8!mYK4!xW@rS-B4O*7^|%JeKJ-M-<ktbkU>
z<pEKU52`zES8%0A$cR&^Q<;I0X_8rb6Q;@rO@m5)!*8aTG9-S!0$hV1>(^bGd~sMq
zu%B(TPu~WMX((LS)Doy7a6l;}=`9eb``JJO+ptXXBjte&n`8VtZgVPiqPoveEsZKm
zrG5U)5z0JVP`MS~API%Z-ZwbMb}-lJh`2`&^g_UgQWEbQMB!R@qOR{mj0?U2N$$gy
zgww{+c`}cn0o-6flHyZN6sN2D(sGj@jy{t&&KJ>n+_y)E60+qjyl5QGttVw@gV!z&
zYy|O$aCcK&wzy1?6CS+1ZMe@~XE{(=ZG54<-YZ37IZJu=Ong<Y^Sl&tGH{W`g+iD5
zohWe=GSgceBJO8O-b6OIH4Kwm>fLPWM}&%K5cNH95>aA{aHDug8*nXO3fjT|cvD{v
zprTe((mpjGOnh7<{sGImaj1jHo1jU2TYE`tt(BD05Pe&1cXpRXGz_Kvo$%B3Au#g5
z=w{sLm4xX2X^vF4F76>}z{WCaSp}pQc;T~PqysX=JeZR(p}s(p^&iFuC3jz1S?X~q
zB`M~N+e(0ZAfI{Pb&iyLtea;+7@Ly%T8VisN`qlgatP0Y?JE^iW1!ur-Yo?PpZEo^
z1O1Poo;fpKQ^UzDubsyYR%&fJNnDsNEoj><-f`6p*3cex2L@P_kXx{l3wHpy<Gz+S
z+{B}fc+R4?4qK6AQxD18U&(mhP@@8dV^pMw_x>Az`_5w84kEmzt58DkH-HdKRW*&t
z<p7T|Tg9?zJPd;ECz?ul{h2~<ds;Pn$gr(;T{Qpz-iS#f@1pt`_COc+w^@l098<$h
zelE%FRJ(lRTh0)GPyLcV8lXnrcxDQ4?<>IER}&D0r?BMhUS}qURl-{@vI5R6Im89u
zd%SX;W3>7vS4+_e`i88BuI`I~5>$iBr{O|0b9V)R9UEY&S<kL4el>$B&6{`1aP3id
z0w4p(KugHjm@WVg(D5pmyu57WwgdAhfs6QT$8JKrv-doy*$*3m`XuqpCnHPAiEwCL
zKn=Ti>ua9?jn@wP!!`2X=0y20kOu;PD*}J!PwKoI?Y;b?{{-~9?APCSBf9yG$m>w^
zgDc@@f5u<SpLEaRUjCBD|B^rHW)3*6w}0||H=^n5{OfR|>P@xZ!q5EwwLj_4*2l<Q
zzxVQ&Jbv;wGOyn@{rb0BANL&Y<u7^s=hp}0&g}%;sQN#)J|^zg$G!X|kDt~@(8X<2
zlt*`6{3d>0zyDq5kK6aQfdL5IDSdi#Px4;`g}XA}n;&)j7eW25Y`pScRJty@fBm&U
z@4v2RcUP?6|F7%a(`7-v`TokUL&#l`GRnWM_v;>hEcvtNMDN$rz5M06`p+L{TY$Gn
zZdCoZkFy`?{J%?OyF}o1$ugjDohKX3ytlJQOQ0q(U?uMLYz-+Od`A0nF79ZFTp#4M
zq^Awmm5iqya2Ykk5h1RM-@E60zM>ZOugT4PVN-)v@*iDxGWswkaOxMPO}h=BPwohq
za2w%)$%#W#&5h&i(=Z(X?PBL>bRNfm0@F~;IG7i6*rhpxcvVZ9xVM~QIaCzK#cyaR
zBSV6?Tkm9%4+{k$jN#1qh|CX@qVJ-Zu*&%?X4a=&^7wbr3h(gGYp3L&34L^X7O#$~
zLfqLCkJ4ONxN+EW7!DQ)0$wj#N%Y=3u@6LUg$A8kb|js7QQse055w&%p>_orogTr(
z-XnyZwv7N@*Q_>gwCMf)^dR=Wa4%Gak1Z}IsnETNG3w`b=TBNv{i=MV&*6_XX&Z{3
zD&g=4Y+Y(R50RETy&{l{8;RtmnqQj01UGM{NcB+N;aV?XPt4W~x+J$Lsl^y;JatlD
zNY;Yu!`}W>ueZK5AE~hKL+ACjx66a#OwAu9Vdq7mwKidC&hlIfePki3{!8s<oqaAJ
zz#tT;+>okG&9Ejhd8ac(SPpU1?QlL{sv>y85jWe>M*%hSFMt>+^AR<GEhKv-b6D^T
zOBpC2%|a%{F~=AWXUQO!bT`w&X_FK-N$^}9z}DCiC1uX^L)`JU3uh;KA-YlIRU>P-
zt7BI+L~F}U;c-_i!LIxn1+%+45E>!@*9MHik%6iX$4BvsW2g!AD>E3p3pHw=3z#j;
zqD*%Y4IYtuyyV%tER$wdk=+1HavE>I+~$*D-%3Gt^%fo7q?dyJu)q<`$%zNuplFd+
zRsT{{H>F*!?j2Wu#sn<q!kf|jmna<t;d~<OYPm&@wa;kTM&6@U#)bPP+Jc3<UbQ$V
z#!@<qgByvM@!LM7PDlEZwM@NS;FK6|r-z+!9^qUCU;b<n)Snh@1Nyn__;4$=W*_SF
zmWog7Hjlwt(d(Z|uGZ%ikw}p1oTiAiDP;tJPedr9J6f1H&7@wvPKc^zo<93RR<JFW
zzmz`6PwFs>G7m$2i5^>QRnkXM+}fIU0dSf6Q~@?V6EYUc(aYj-a5Qu)Ru35`MugPp
zwESDmgwN(a!C?)hI_a)^a1+ps&Yu0CVpNZe!;rk_*Jx1|>7Qpp!p4js#}zj*6Qs$$
zCkkk69vSa~GbZ{x4lbT@kqQ_X7xtc%Gf6Mnvhb<EM<(w1xl*mpkJdXr{naNH9?qIO
z*p%?8Z-d_XJ8zr$ZHUnGM(ABZ^>}ah7;diS(uNNMgBmQ+U3E}LTc*UuC}~hPt0`8R
zR!3lLFhaD$I@;u)+iyKvb@+6+43s2IAlu(r7@r^q3?PpVKb4ebm%cL}`0+XLm?wbi
z3vZsooJW}>XoI;|NZwmjK*O9GInbO}TL-<|7PL&s5h)j^nJRW1Oe-Ldd1Lu-_;QN3
z&Q<vrifKCrMiLOL$hFpX11lSu$>3$j8l}Hfuw2dPzn2Dfi)oxj6VY`Nk>8z0d<Rt|
z+F5RjrA%-RNop<2_JrKnI{(cPzzGzyTVGKdJqrkhCun*aUfFYeU0P@9NPoU0C0$j-
zw5hZPM^P0et0Lm^aq#$}JS2Tvypouwst0Qeq=R{YAhg`z|L@L%4?R5)_`gZuSMM`f
z`r)eVI~#@N@9~@1{dMr~hRu(~52*(N4+I_vJP>#w@Ic^iLja`a`u?!%_2#kn&BBB_
z=THbp;w%|qto0dAu)(kdVeRIt_&okYw!!zuUI}^n)z}&-I^LU1B}K?S7pR`?bRxPL
zjaZLmy7R<w+%BFSkN6iLgE6rK2B`E8M-7yho7jAnklpFY^V&tc0_e?n8d-u8he2WB
zhPbg(jDH~MExni4co>0f&^T}N*&sACl|h-eyv`W^Ek^&rfHUxdLYgh=2}foQM$6mw
z@nPXo*+JN)cJ#)#kL3W=0p_Rk4mnmqUSum5N*$sg+m$(+1U!RIZZd1t9M58xAQDz=
z2jGB4EyQv({j!)vtxjO@iOUFp&arw*Z7qMChqx`U^)Osf{39zjjcU@>?DR>UV`*w3
zbK#;Ff|xi-NbbIF0UU&QJZ*R~jIs*-1Za-Awz(xlI#F#1Q~9Li(r%EK2JeZ^xB>L1
zR0FW0$WO|KgtZJLdF41{BtxJYEiOaOD&K1SN}hD|1o5mx2-a^r_X6m_Cz0rqE;6y&
z_BViu%NEMT#^Y0~Hnvld72ffqxtj)qv>oN9QYc2tF969zt9#LUm}UoJ6cvjSb`=U1
z!*9P8r6N|>$OT(Ry_cDWjNhf#ezXXn*&uQV4=x!K3h!!)pncc_|73Z7(sp2WLs_Nl
zH8BMP*9W~MJG5L}fYI@_RAQ|UL{3CfU)oo&G$4YHtu)k<&V5$R-!J4CKeC)^(|vlf
zOvNjEh9+{h$Rjx5Bg$#OL9WvLLL()#4np)9TUHrcuSeWBeOnj$E{(p>A**3`>M*yY
zX)S;<!ex5=wb;IPugJAP@=r}H;3-}rADZxuJA*cNF2S2llJv|FC2Uxfqn^I5X7<|9
zqOZ~x?|-zsJH)&G6jk<Y1Ztg~JSRrTE8cIrp5D6b6)w+QdG7jabT7G!4J6WXQQ%M&
zR5mxp{Eg;#&`ZcJ1QkdxY^|{uSO>eSRtYh0<Q(!DZNK=0_Y+x1Jo}1*2p9Uf(6<ED
z4BBEIHL6vDfoG(}R7xLGOwdAuRhI}T(`-w()WC-Narjp(8o#C<&Y*ockUrVFX~>4M
zkAj@oU&GV0BYy3=<q!QCX$a9C*2)|_zDit5G(oPCVGM3>#thCWw-RkgOCto(@Jz_8
zT2BNtZidqq35aa6Zoh#cW~J60%Qi6-x<?Mv2Qr#Z)%M#a{3oyujiGGcRKTu?kUCQR
z%co9#3+<FdZJCK0HT1BbO+Q!*6bh%L*hl1VFwgKVEf_G$<LVrtH8=@izjQ*CXhWU@
zC^ZdB=MOp&7}5eMP0GPp$g$#lJo<uKpIG!uP#d+-OpPaUROf3QcbA2XLw7_BZs<xu
zopx4>lf_j<Udk0!`{Yl%j2v#`{xeoA0Frvl)PWKo{qaSi<hf^Y;wH-!4#=hr$J9=<
zIo601E7cYn$AuD?p-?c)o)42qG*co5QKeI*jQZwWV4c5(N;NyejHt<KAs8OH6deKT
z)5}zhe81{PJmjm)l^H$4LbM&g)2}!F`74m)6PVDAFVOp_(18iep&AwH`1M}=LaFH;
z(cO<7yoI^Z8RvI4N+f2=s{^-MuFtVFdw5iE?DHB?TT*2u9_FRN1sI9o)*{AbWD>})
z0{~YyvGxm8#ZTic&|3C(sgd<l;Gg}`t$_IkjYxJ`EcR){w0C)Lc~M8imNyJ1L3)Ic
z5Sj=URBo{BWXv%Mp@*?t$+O25rM`R<Ll#QH&D+VE=1rBbaj6(Y+Q+%UevX3WaTHK5
zXBEaj9bmZHM(S8osf_Zn<6{a1XyL7+5`=*I<(0pV-1Z6B)4iIE6|KMrMzu`%Ol^%D
z{)|t+^262t$KG28N3vvjzG7x(W@cuo#LUdh%*@QpRAOc<F*7quC1#dNs&{+))=u}%
z+#Y)$cHdgR3bV8m8Sa_s@$-`&?*BO9o}^`Gf65<Cw%+Ic!%=FSO|$0JtI&A7#ue6?
zl?X!xGih1%N@?8*+a)hga%#vTop66JJ4^SP1e-cv$B%g;J-*?UWTcmCeQ(RwZ~uGT
zxO%D+GoCBzIY~6tLAhSL?gw0B$~z;*D#kZ%NI+zi^tiT2P65c4m%DM$lFL#Cf4lVE
zR+i^at_SHwB6geb$|V^K?GA=6apCA~41Df@WxP0T>MH?^&1XCjaB+HEsio_H-MuBY
zgfb}&%IDQ3w-3I)#DtuglGUd-YRE2gY7_bVn5S>?!-yMW4Uw^2y{1&qO=LXn#MqyJ
zQiC-SGsyF@;<L4zgANTx6V8L)g0vnqri$1-+zRMTQr~m25l`F0aA)sG4H;&?m_H=n
zo+71NZO)0XsBc8xDSkS`Q)Fz{ia(B3j^`V2vF{kD2tt^pX7gd%#5rXnb36XfXUWs@
zCs(g<>j93vW*J9)<6|y(TZ7288`4RYNqHNQvWwB%ryA+Aas)&qW^duS6Z-J5f6fDM
z%3QrOJgf&v28@jlQXVjKdGZ5sCQT%Ws*%3c+`;W9gm3$(=-oVApYPB<U>M)wQnDb!
zh<?Ce4EZwTCf*2w4plty6flxC)XIvoldvZXBDZJ3dx0M|)%?WE0ka!mB)$CX!gtMH
z7)m7oJkT&ZkG{$|xK>N!1v-f8|0+sOaiT^9B%qkeitX4Fhs6V?!?=$@E!*A1DsO|h
zz<A`ZuXnPie6ijsFvlA|3W_B|EfQ6ffUTZFShi->GjIVcU8CVlQs71T{<*SQdnMZ_
z`oqSDwxL{Z-U6Og=B|q$PSxFjh31pgLd&4kc73Z|^IjUEBpnUQUn-z5F4Gm1<Xd;k
zh(uQM31*uL(neO?q(kT3OQtv!x1t<ZSy9AL<~@BW=PZy$&HN-NKCHiC*{i1E`b>ix
z!BH{wWe6Qpt(hKYGQLy`4`Dgxkg2-Q;A>GKYV_K|=`#+SJr?W_mDBeYn`>0k0-><o
z03FgnrI#Go1dFkIETtA^WBS*)F(R5V&*xXt7#cK9`T$i~Z9Mf7c6pHx9zeH;V2W>d
zUvY6=dIm*&e&nw{&OI1z9YC3eMaJsI#Ab4=exyOBDf7e_6v?y6nw$ObW$oTU@4FE)
z%i|wimj#%ph;rf>0a!YztJy4{5DY}ou8w;ktiaR~`h0ok(0BZ>>YRjF2m}_W#nn?X
zi~|B}&D|+$6Y~WH??<&=x29@dc9OktZN!!gy6)MpWsSgwDYb~;|J)_1PHFTpoPfd=
zpKBLQeg|+XSX3#94+nfW_PtQFx$8V&P6zmSkg11oI)yD1Uj6zS=W?r{lfQAm-el`G
z!d!*@rFhU<zyPh1lq|(Xy=ks2^F<~I94<(RXVY)=Z12upAa)>+7sb~65w;v?5j{5D
zX0PH}*pky4=RDdep6aTS5s=lby^qb=KP>|=kFs18!K|kp6|%nEE70rg`X*;K(jnC1
zegRzHq+~Ez1H}pShT)p&s^1Vbb_Aje<*x)G(to-Q`yV}7i>LqC|J&ZL8{fYLp?~J`
z``vE>zX|*%@SDJI0{?ylKtBIL5MoU(u|Hm2pfDrMVUM#nN#3rE9i{Q~UZids64t5*
zsaz^1;H)>YC!K`(Ml*V2%qH16b&l7xwX<mourf@rY(uEzVR0(LL8P~-!sJ1j=~ul?
zv%NZo{l$$OGT-W1VHx=Ks$;!zA9hdk1dh@I!4LKPM8&#SHGnP)ey#=kUgjz<Wp@^O
z*3nf3J2yKFnDb&(m63o>7>JW6dSq~d-Xp{XqjCA`$X8LQBa4r20$_xK7`Jo|i69cc
z$>534+bPS*6@mnfvp$iY>=s>h%S)n{!&VE9=^qJg<3CNzYEN4VUQSn1hnPa1=g>2^
z)$1vQ<r8E`oK1&mY~AQ|Q-8#G>AcAaewYw6i#BtDg{W@`bW~~F%*4Cm<`upNo<|g!
z$@4G#u$vs+Cmu1&R>pEOg3J>$e)2D&SSghKz8FZ7r36o^#7!pIdGF-GI%BMd&={;L
zznD~4EzUnVY{%~WB8QcNtfB&`o!igMbbu%b?d;UGh;dO1gePBx6F>8(5>ZdiS*+&Y
zf*mYn6f8Yb@MKA*uIZkU`y-K?PoXdW=BJ!Yztr}Zb>VmG{jxl=*6|gSiY-VDnGy96
z#Sxgi5Fxy-bUsVWUS-<$R#Jlj|C-*H9qVT$bEj-GHG=#y$iOA>&jf@mr5?uh{VhO3
z2)zpBIu!!iilRA_7zIhY&Da&XQV?GYqjenKoedm1N-%tk5HQ`-z%;Pfm^+2Y&X>c^
zac|Tkh(bFPJ<|M%5sR@?ouxO@+@~%4wX+wTwn#OInz}dTZ>O+ftBWv4)#~YvG0nm;
zsBn5M2z9g?GShrcgiq=!aLb~j$h7*L=FJE2(vL()puQF@aL*eE=4mu%-uN9+Mq=Xk
z+*kcxNKr)_izs#p0c=o8az6-SmeRIfhnI{do!W!a_K%(=K;2j)^guSYrl%0Z7OZqY
z&9Gp8c==23_->)?R;A-8?aqS-(8|4NB)IP0jf+2v-0*MS9`{QOQ71EfHHR=?8u{5M
zr+L>xq#di^pUOca6x07_H9+k7byO*?3Bp)0;zXTrfYLT*`yprB1>9Q60xxIya-l#F
zWFu_eTqZL|abn`y2H1?AEjs93nRI+pDtzm%s`$n^ZV=KG;nx3i0%YY*E*|AH_6gM=
z_fQg}%1kjmbC6NU^yM=mT0Z~r=X{;cRWnzO@7Bwz@uI(!UxHaw8Q~dYHvxjWYMTdz
zK#+ybRxqd|y|1=#L)6LhzRAFr0EheOwOK9}SgnbUe_v(}-zPvr@RTvdSWhKhojl-{
zo?raukMcSa-e*^ce(-XA+3(Sroz>mUm7S=?&AIqJ-p)SNinVBQVKQVFa2_mW9B?-=
z%9Rz!NE1D%A!AAP6<4Z4-9>K4Z<h<`A`K3~Vas9-@?7kN7F3_AA%1@BOHj>ZW<Ja$
zXN5HCx}8waQP1YVVMAKcVYc6PFK=Ce>r~Ru4WxRUc)XukW|BRBvMZxdp_p{;BlUY7
z5cwMvX$4;2ipe*;Fxm`eAF5c7GF<t`aN0+A3p2HZqq+_#(N_M^*c#p_99t`d4kC;h
z$*9n|@}Zgcjhz$uYe{StxlZ3y(gx(SKxssFsc<n3J7bW?F;wCw;H|2MY=p|^C3`AP
zJqCwt`TJFnR}%V&`TL6<D^#+~SUz@n^mioRiT;qcb`bn*_}7D~)nh3#S$DCLc!z*h
zkruSv(*bjnHkdhsT`V4=s!GKtV44o&VRX7DpsdLk_b8~Y^Fb`r^XG$al0({J^4~=!
zkK&{2M}uy2m^vWU!E%WrBY|r=7mLcAGx*;rcge)S=;qJiXTYFZ=q1QI=5x_j^ASY6
zj~>b3wJpwNP94a_tHy>maLzx_<Vjt~FJ(x^>nzf9_%tkw8>jj(XGg1GO9WV|$qVyb
z1#aeJz?R3OgseSbmD?AFn1x?rFhh;m>2*<HXe3nMEHaMQnq|NCalqg==d0DqSQ<z3
zmS2%k<vC^|y1~Ma^7!Zl^t+am*@)Y-;if~+k9DIGXYvnxE^=q4tKpp5!wVdqfP0E+
zN$aY~e_!zS#A>W^2))HFU3~tgdv>fjWGUqQOaZ&6E2anKW}!SQio36U<qg?NW=z4`
zAtJPJu>U=TMa1aY3aVF+@COfm-PZMIlu^(-%<0sdPG7<%n&`^I`%G#>-_^+pgelW1
zhh%~%zqPt#+=^pGRn%j%f>HXeOPngI{_9p3_*_MFXf|!oY@P_Fq-!$|wituu6c1M#
z$h!t*D5{px4J^{v#2kkt%OgW-_Qe(QF~kSwB##r+I3H9!Uh-9Yz}+z?nbtXwad|G{
zGF0X5^l{ms)#bo1Uq@D}uBfd=Ya|hyE+{MAnbDN><t!doiPNKls;P*B^<YXBFlzF`
z8{KH3`F5!dvoTNL_l7-bTH7Xq#xo@H)mkVDFcV#0EXAymW5DI?QTaet`)|vl7r9X#
zI-P~_GQGbmh<PU8ii7&i5=I!}$8+`10DS8K)wY^gVx*4&OI}@sbzXM3GkY&nQM*th
zM*Ob5B-~KYw4aCz1b*HtKWPtAUmecCOA(LaDR0R=s?CJ5(%zvzaMI#1;xe+1+c(W}
zc2=Iw|Kt4t@^1Ml2I?8IQ+(#z6*x}@!i1&f5xfk>a2!JvY|sjRSsKv<&{v+L3c0Cm
zy%^JNm`4`qquo<vQA({blbo^-H#o`ZLg&r2e98v`<5Q;nPisBTXMq)zC?=FNE$Vum
z7>A4anZ<x|52YT?{OZn}Y}qeTwT>^T^?Fb#0t~4_Ql`QJdL?M{G!PPjR65{B_;<I|
z8sn)V&9F?C&<+Wv2$T)Y;g1QODGsLe_unLnMJW5YsvpFJlx<`pf#Gwmt)N#$wXx*N
zE|iEemzv84D<QT4;lF=xcX)}>I0GrkNg3(Dx<eGgk3f0hsD4&Km)V7St8VmDp`UiK
z8EV+l$$e#rwz?%E?4Rxclu@J8h4}KgvTiues<-+4zU<}fI#?(9REYNUnDGLI;rAl<
zVzxQ(*iAw#>oB}g+SekDjlTe_at^9@Z}D9YI1@^H5Sq6dsrs{Eu{1$1T9<IXV$B+e
z{ixl55|@-DXuWI>?slz;U96@mtzVzj;i84BYDLOzK{w{PxeUEGB-2x&|B-2-_{dtr
z?qH`bC9w{nffJy4={SarjIRI5MsFTHun)S~$~r&)SXZI)3L|7vv~-*6T^r}Kbobyg
zn;8Amyvm*2XmC{Ehm1d)eojW1a^3J6t^8fPh%E6$K^yfXd$HJ|?fQ7CUKTT+F)^x_
zN%_-;i=oy|TGBU?@3Z!UFvpjiC=*7F9jGQ2ra|-K2Kl}$4FL5VEB;5(bV1qJ%a0gH
zXTjCgg6?H)MJ`F)sO^q7rvMMWg0_Iqui9+rVtEn7eJ7ypqnjHeqfOiSSpRFRJV4;T
z?il}yl}|7G*ZK7SOg`!O*ZF?M%B%m;WBOOyuUPrs`G1uQ0N_`wyfo^+j)DIbD{qbd
zuk-!uU;B^Lzmn{D|3b3Ef%@m){S`z1TeABProZ=o6ZlQwH-X;-eiQijBmj!`50V{F
zP(HvEavhjqBRSE#FV)Xs@mKNd+uQkw2&KY|JUnQ^#l@*<7-lMr#)2r9J`(GnBiT>$
z2Ev<j6ChtfCJQJeMrTh)58=DqTt^7VO+6Ovz+)qIKx+SpLGZpbqXh|nz=D^DzKU3{
z-Coz?3U2Q&2lYTjzt`d{^%MJyltgAHMA#TNg_DLiAx8JyjTR<V2DyT9d8uz)^STdl
zG{ip61r%8zlu%Gx?R|%XQ=o4Oh+VS}^!xk?b@Hm#mHNdukg8J+FR&MTsa!C;%wz@0
zW@PO-gB};G3Z<1@$(Qv>R9+ge;~I9=sSkppZso>FNbQ^9#lud+ARNtxXsSFH+!V58
zW@j<VUOlxB)y>d6VoPO?6nhJHvk@ZY9}n96q>vP_Fh1^p{$sVLqaBEQa@aVBLLi!*
zpL{l~a2+sX{MuIT2Uf#r!^-!StMSX%nfV(MZJYjbIScL;#CoVrr2{@}S%yAj8tn#(
z;j`NZfb_t1I$9hvAj`ak2uOWKe=O4eKSJq_`O#gym?|KKDBy&5QTfYuYGWjd;kS8s
z807Jt<85R+83mK!tvqm{bQ#L!curRYooRaQ(o!k%M28tUKU({-Eb7{Yi2MC$!`h%#
zq2iIV2~H=1zvoUfyneb#6`y#B5r<U<j_CF)uwcldQ{o?zZ<Q)*=W!1Ltn0##aHz}W
z`ss^kgLFFFn@AlpjM3YYFogwsfi<l4fpy2!8+LuIT1P&4+7jPAFk#6g8i3I~>*EF*
zR;zu<021TgqvucFRejO3g%>Y{NKWOZ1NWUV&_2AW&LWtcxcMSUf$?nHP|L~qdR7=t
z8}~qNPDqQeXhp&krzXee#d$1Kk6_oCvJfh<SA@rk*sr3eHghZNu7jnrpWjIPfoPJy
zVqmN7?Aun4-;50S{Y9Et#m%J-jHjp+X70uXUzD;7BBVKm+}lu4V!E;O1m55RN&G}5
z4+UTN2qWLlI55V~aBuO*OeY?!v|ol-IM9|g{0K4<T@?4C4nRseW)dn=H6+&d&uX}E
zj6UCOoyVGnk7?4eL!e0`DYY7H$@%dXb@Td+PjhDHj387lr`UPVVNe!O*MrWF#6GA!
zw0^R3V=q<xTBBgzPCWx(%FbA(+RcZd=c6rpzGj;WST!Wc0qtTw8W{f&L$KxD!dD(O
zAC&%MC>*75`r+LZY~Ecyq_@NVSPzmfQ;KKgx!017YH#3y2=~$nCfi+(Lvi~FMO}^5
zJFDsJ2G<hKd@ZV+WUp~OS@J0fIgL{8@-hC0|AZQng4qG(X!HwkU_;<=8#h)KYX8M5
z04(i}U&p@H!%xG*n+*ML0gbwT$U?&8r*StBOEdLaTrm1Gy1}Li@#XTOyqrJRYkD{i
zb9bPCz0{N3Y~Ms>vs^iOzv6*G(COh2*AnQ5ZS1PH<olQu2}G(jBPA~jfqi`YB;Sbb
zm@_QZMS9Ksv1^X3e4YI{1P_xR_N8*ISdn;cZhe961|WNg6tYq_`lOKEN25uQ^Q@7^
z_r!H+qCGJE8KG*CRkro?2g3aQ9}RBeO#mjFY(A*I6{+33+ShbmO<<&NkJ23*aB1#`
zKX$<+RM`9S0FdQ!p<K@qL#>o!LNVysPb<-_rXlo68YbgIuY~uiShz?|boUOD%oiSj
z#;!5_rH?a4IGJ48*6$ivf}i6#B|^9r#H1VQ0lT{ux_$Jtg`w--+hX&hmQP%()nkZ_
zumb%Z%a69`^lI0jevJHm(E9IrG5CE}|34z|-_7Z9_a}>A_8*_E|Jy^Ce?8|54%Xk5
zg?R9P#QA^woDKioTm!#2{IY+^<NwfH1AnUnfj?eAe?tC;HO_H}{$2rpE9&2}{g3TG
z^T+s=^k3Tl&Of&Q{M0Y|mpuN{oE{)de=pw6KS@phxqQErwEwbr|HlvhcmDB<!!P@n
zJpRA;j|YywSHRzj`oGOTZvL#HU-mC~{3rk57XN$k{Qs@mk6#>q*}vrR|Gj^7%KiBV
z{om;y>wo&kFZ+)?p#Dhv>+4IB-rtKC{6Bs7Z(sF)d;9U{>Hc41?jrQNJ0}MiRpom}
z=w|gN#;ckmB3tV3Wn^10FqeENoR~RI58-GiHhY&7S^Yb$^!FcMAL#c(SWG2~C|su*
zH6{831g`+qOXITGzhZ{V#xL`M27ayY;g<3WJb?Z_^Z;+Qj)anG!RIq?|6$D(35Yzt
zm{aTDt1D<;I)s%w8KA-}Wx5F&g`yZ4#BD%}q(5L$4qdl|C&QR|+acR?a-l6$@4@f5
z@5vizbngdDyiLUpp_->)#`CW4&CxK=i}EVF6{=NIK(lui4fxVl*UNHda9hBi`o#?p
z!u;|%6n2TtC}Mg;1(AdUtGE#8(O;B`gf~-JS2tBKbyJb4m}GjEjJE}2s-qURMoLUd
zd2ozEn;HEvr(3`<AeCpXnFjBC%g=5y1N_TV@ms4+`HeJc_!ETMIYG2OV!#=bDSjV)
zpiQkrAi_o*WY%DP*s$vI9h7jTTfpKl>Ck}F`aD{*!dtK4WA>>i;Q0`_UE{#FDUU);
zIvAtV(Gx9b-xGeHXp?8U=<IlaQ&7-lqnMMXkXq1HM<NSvEB3R7#P4%xKv_>VkmFz5
z8)9x2j9?ZvOdjK+M1utYOedR2%>5M;vUESPX0_0{AvMPxcD_BQ9}st4%QH#UbNA7@
zR^4bE3#zxP)sRP*B4r30To6SfyWQs%dEl0^jYI9@SAV58(;&r|1T|3qY>#+24`-R9
zeWXmrsh#0jy>2|C?DJhlrrU(afb&4x=P*U>eK%i5ZA7F&NjOmuuZ7Z@8GJmwH%eV4
zP!2c;RDHmt^A+?lfnQb(P}Xi|9p-v)OaQeM!59CMb)$=Te4gReTkX{!zxFviB?72r
z>B?{#|FpG>Beiq>$Ex4*^>iVR+#3_o7#}lg%j%uN(6gdbQ5!-%ePq{V0yy|W8TSXL
z<vJ`QKn7&tN3Lhht)L+jmV1J(3o(aU13N^Mhc-?$iXS4jQ!Tyn&v``HUm_Oki|B7;
ze6Ab=2H936TSgzkC^dTg34DS9GFnr;!=31Jf(BvzAo`kDCjh>~k5u_q%zfbv;(eq%
zCr5yy8vp8e)U-)UvZYAMV}T{4PN_po)~@!xIQVLLEwr6$j#<%(3*hM2-hiSrvM=nj
zKn+K<LV}#vfG(@a&d6MKDf5kUZN<dHRmE0#FJ0-;PaOfUFlok%wI}M}3_TzANL0@?
ziZ8$%{-#6mro=GKJ^f&h3`eq_3!9>^oTh8JBvu(&QaG_FW%g+O$uVo3|4!jz65-ho
zKQ*!b%9+L|TsuK(a~;tSO(VITOXQQs+BY|v8j{v`A%#vJ-+h2Bl-?CQ1ol({_ztk|
zv8=L1NDX|S0zG84J2&IizlM9Y#qXrXRDx5GrUy9{N6Zk=M9y&Ncqt$x2RwiY9-$0!
z5u2I4(I#)+2A74*&cYM}xRgu}7O1&j_)ERpVaK-^*3Iw5BXUM$J8+U1u5lAv!QGNi
zssPMVOkbObIqc~SZn{)3N>!1-$T4ksLbm<=ncb&3wM6n0u3?_-3TWIla+^xOdcC$c
z7f+Jv)nqlr80N|0#`R_vCbTjI6NP&@ZUB*VZ{lE`+c+Ra4lBxV^=TTv!X8|v=kC9w
zSmx}qeBN4-d(S<S9=$L@q%NMFA%lL{+f@n$e|DT}XGWx1uTatl8E8=0&}hi7A|Z7J
zE7S>=7DD$67Za=MW43NEM}`0$sfe)@(-4QilZK$@e+&0$(*&Cj?^rvG83e~06YYcR
zh*8I69e8<*63Rc80raxx@Z?3ex=ZUyw91MOO+M|eSAlqsbDX?FM;SWHF^xRkA9oPD
zA5=h%vT>%-hvUdZYL>d@4%r0v$D5<?jW~5{3KN%~inXd_G5&&0befG>ZIe3<L9Oik
zj&fM5*}by-oi5P9`T}^IjS^)e(gYeO(36w=-cyby&QLGPFH(=+rckb3bzOsqx2MWl
zOE<H|KE|fedx%6AfT5>j2e<-68+RI`M;I7*`0D`Z!_E$T1V0VT6R>w**+-g<HfS4$
zhRN5JmvKaZk)zZ`6E{yDr{hT3ilYtOv6T+1L(fK^#jhBMll4OPBWh#^`esLq6Wkhy
z9*(&5Ns<SdEG@ICpXCz!X`AV8`5kr<ikM4OL&@%{y@(~U4X>xt0x2GrY8C9d5<XXQ
zqIhL}n*tVh<u>A*atF<@vto#>UdfBn%2~iRgX0NK4`Y#+8F>_5KgiK+*a-rXC94!D
z>UZv#VBfKA%JGK?*C8Vl2xI9PD%o@*KXv8ryYx@Cpf?AHjR+THF>7N#XEtsZv_mg8
zeUpVv()K?^lEk<z(cL{9)lFaCa=JTP8u)7u>LSi;K8p~=T)XkMqC;aZp6?}1?3~HV
ziN}mYeU$^%u^eOcfHyM8q7(tA?ct)Z)@JY?g(8;_38`%#mfOWePopsK0DLEPmSXSV
znP<?#y7J9fq4f^?zP5>5BYl7AiuXk{tN(sq3^%<`lU$HhW5fI@q7ue}D<(=MfI9J6
zZB3GaI;vS~e8R69tf&mDSLGyUSCHDOB>_cxu4I`QW1bQekaj?uuT8M=nmd#kErGsB
z#hgeK|9`q;@K^ByZU5Bk{*O{gQ~kZy_3Oy5aqvHN!hR+Gw!aDdCh(iUZvwvw{3h^k
zL;(Dce#+kzXCUC4pLZCNkQ@{eAcg3a`u;>J9JUn=>sh>Cmy2o2UlOqj%t9Nx_X{MQ
zfO6-!nt(P0FunZ}yrV@>7?DWP6hvgbTTv0;tFg%rJaSichewf@1!tn!7=PU35SGAl
z-vF)<!MY<Ecjh4ADWNYCoX`X1(*~bar)k+-Z{nJO1t*1BxgHK;eF;}gGVr4C^HSrm
zlfSbD5kNHb!>x<ohBHGOumJO>lVC7V_L<Si9&+uPqwP3&O%7|Xl0dLEq5@;wHs)Ok
z<3~;Rl+`QDB(4JnZMy^>_8(vu9}>_(7@}6~hmNvvOfN`AA+VS$>{2F%Ig?hjFGodL
z9AA81!wd2pA`hWFXH!kH^0NlVu><sF!x?eh7u?f;yatTZ&s}AHZfdl!e;f2sRCl_k
z>a=#bl@XCIcZha;?bA}ZqG&$pcrX{2b%bTB7V}sfd%10$(%9c}@xb6GUXf8a_t07m
z5Z8-Md>o+luU8o~X`yQuY|n`zq|QqG##u!FWyrU{>ve?|)DFkC$NqV*t*yKN!fGo$
z9ZtrXASt^o5E_knNF{`(X>)r@k}k`a1}m<ZF2i;LVo}{!gN^TKI~p4Y%=XJM&K{!=
z8&f|SnVWR)wJnlB?BI{MC&3P`)))bolZ3Yr&`?*rY#6a_l$WBDhmsev4R!m0uJ2}|
z7nrp^*pPp$RQNyKfO>^qP~~|U5kvpm_}LdvT3k+}!L-_O`WN$B{jHt76&4GNn?bkf
z>u14HO;xWY`Zt>tRzs)Lr3m1-n|>O;-H_T<4DfRYWrulpT6i{_Zg-HyG^H1h3FthO
zZ<UnCLZ4NM5e9CqLy-6hAsHWZ{mUy>t$Q6nWXe)YLLzbLlPdHbxGlOM)yxH>gis^a
zZ(=+Fe5#wHSF!%I$=#N*6PgmGjL9y~&*asCjwHf_nU&25_?m6?jZ_8=rJ90X$}=uU
z6E?jK_NzV2$?@C5blJJ7YIJ!R^qtP&Ui;gi!)Eg|gU;orQlAB`%hMR>(Xi(>cP`0X
z;nU_cyffkp19Akdfz2M%8f7v*iD%0hWsG7wr^o>Wk*V&e{==|!1M1g+Tv0I^Uuehe
z8-I=)?lGwH@=GqYI%Ao&+5`x=-q$tMwom4P;H4Xf5SkuqhPkT46~2JqWv>;z2Ca-<
zkeAYf#U#Kbph}i8L-{(ZHAa5ag34&U#>0)hx5>Rfsz)kYU9ww4R0Lwh#eF%=&>s>K
zL4g?;v&d7d)Mtzj-BwNq6Q%lbM*;<XK*JNJqFiLYh^_>=8dCINy+zF#Aa6raVMo+c
z_sJ3{rE|m1t&(!=uwzMKOw2~aN3Twxx}l%v=k^1wtB<6iSJPkGWTR~5y6B9P81||y
zYD9dOhxbo6h?_>9b62Yil}F|2?@f6_3q}s&trs@#DGPPPvR0}!W9UEiM?kyt1Ay$#
zQp#r8Qi#RSG`D_wUo=!Bi!;N}eRANTW~hVi(SIC|)s?#4&Rdw%iPDWNN9|ip20wWg
zkFK7i8wQ*YPx4Mu5evfi*Ocg>RS(;of|<CST7EE<@Iy;ozcluhrU$eYF%bF|`#b|4
zD3HL6ndL={+w%U>k<rn@>YRP>LRhnCQ*RE?*iUea*yZAsUJyI-b-OW;N@jF`F6fB>
zr}BPz3d7?&Y~2rP$tl*(C;;Yuxb=yyat&!^rMoKXg15y>_$Q-V|3I&OS2kjB#KA)W
zuqCNb1k1z9vXl^q6Ccq83JrMt!T0%@V$eNTQ34NcRQ_NGE*owuv@?sL!p*0${fhnL
zctat=WdNX}??Hh)ZQCu_+TUJ?#>c7#KXWV9*d3WX!=JG`gJM1gK3R*;MJ-dOu^>ys
zmkKc@ur;TKs0=23Kjw>8BcFybZpPKINnxmV7qbHPu1vJrSobg)??l6yR-!z_`^bkL
zy;kRB=uXOBqC~UTG|W?R6x0U^gc!iaD6_g6A|U+g!Gscy#S{{RpDTizMA|iboY&xE
z&005d_S`}0lU;Hpx}=BrG$yxPvdqWh7HjoOj2^!qWyIs2D<lX$7$sJU*4fq{xXed1
z8~hnIBj03arYG9Rz<3V`YO0>VZcmTe^$<+`ep9y4(*VFtfbVYIk?R-FfVC|)hDQ6&
z5R)eA4cEXmyIqjyW>NBql9<y!=&QS}@Knmy($~O`BG`J!rKQbu5JYNncjn_6kfjMb
zTyO#oD(S=4n$CKWK<20rJK7^lckhUVo{0sO7b(XgR!!Gp<;;BUavXyYrM8ID)^PGL
z<2an5@5Q?J@sm5LwKrYEz;1LSl@4NgT$k><f$a6o`-mwG#4QT=Yz~xVLEU#A@XJfr
z*^=+`?jT{5;X9rL5KQ;Ua!V`vV{O(61hr2`mf)H{*R*ki%jsWDV_3p;S|sz{ufnn;
z++La7?ZkrlrKd>84{10oqqylN`%pC$4_-D8j0Vz|OWm2ztApBA_Dft6Y(xSfSh&8O
z%iyKS<U}P}Ma`Qnfy<Nc*1SNz0$qV1esNwtw-I|-rr~mJ1~QLBWek6BNZ-^`IOlef
za7)Be7LsD3y$NdVjj6uA)1%?=meyvw8Twy!g_+=CvG12UXuDLZgL(;gAMC^r(v>YG
zxN+=I!4D;s#bzc4?1_UeMZuwfX5*`{B%%$j5Xzu(B!PJ081o*sdr4?&^<F+#jY%tA
zIJvmgVPg#5@b?~oS?t;wBp6tPbhwM{4M&5GS;i3ICW!zUC76R#XRbLrT{ZgPqT;6L
zpJSGIYqZ!LadBoph+G<bHG}6R4w;W81=|}hYB6s`YD6SKk|+iqA|AH7==G(2_nhfW
z4Q^q-5!!nQ+-dA3A80iZ;gY6?_<&*Flg-LPc(&~?<v@!7HS>0QNmw{U<1i=~v`JBf
z01^zJf<u1C1<fk3(d}`(t`4+i>-l<j&K6~Hl!$%>3g00-DNCy3Wsi%3mky3X048Ly
ze?i*%wdyP|6=5nf?|k6oik<mpyq%P~(D;N4FhMOq=BQz?n$cwGj(`u7i8bJdPn?6k
z>hty2JP)T9TY~duh&?0^Q}7wMkBg&eqUGpOOjU!zF=oHg8F;%sB^y&23>2c77pdFP
z*Qp1}cO7JH&Ju&5n&ftTG90Hn2|HX!+nCc>x6-*FiK=2Oahn)jO0?@DCtZ|L0`?s*
zBSR|lolyI5eV3OG@AQgey@Ucs{dRL`%s%<Vw+Drk19=rdA0Um6hto~!s-U1yz#OlE
z&m0COJcmmMYU2gS{N)ID!W4Lua?OdGQwS*zA#C@mEo3XPiPsUXs2zp^a_^GejOEL2
z&1$EUT%2Iyu@vt89|rFHAJ$yqg%+P7RktWwKcIwX5OA9q`;YNI3=DYS`NiaTWiEMx
zA?^>?U^CGK_HSd>+KO#S@GmeoQQi#h#}?YBI`>e?kj{?tEOea)eVdySutKcb#(E+K
z0P>V#cEjv0VsNZ7hw2lo6W-u=W47}JHeUDP?+BYVc87EMCjn!)2Up;YZLu&U_cyVj
zder-`L@7%nj9k!rG9V`(w531*bD;nuxdgbAm|osdOovNj5W0MFln&|(AafJK27D`f
zr#&>VYg#MTgxA}tR|Ttb@f;jFFD|_H@DZ9tjR#YbL}1<HtSN8SO4h!>`gY09#cGoR
zS_D1w$r@)Wxc82*9gx)nYKqEcn6SdO!4M8_@RylVJ{#z3XzayiuvLCU#}Lijx~#?|
z_44f1BdlQH9&3s@&O**Lz95ZdBuojB>Kx7%Xv6al0TQ@D5`D#@=}N1VMj;$EQ6u9$
zVkY4tSnh>lZv84TjWmc$`G^#4jek->W)3PhS?Hk}s>PzKvQEkG2uBOWl6Ja}f)r;6
z7VPWUmM@<`oi>fbP=lH@7-0tkbB8AtpzGpg=TE-Np(xV_Z@a^I7_<ZpcC_LCvTp6{
zv3VohHN=Xo1yfaAcDX8eZ8V0yz5wU1t{1RPk(A>*okhgM0&XsI4fK*;N>tC^x}gj<
z>U}A}a7umsa*n2J74v?Mbq<lruCFYjxa50JV94*$6*{s@X11!Aaj%H+iMX$POCH^C
zoaimA3F8@jDw?ewWXc+w<`@`H&sA#yAQ-W$WxfggRtab>IAgH7N~+Tp4?F>e;5McT
z&EjX6Lak;u6?2zJU4@q_42^alIsq^`QA??i(X$(%;Z9Fz)$kok7MY#-Js&>1g{pW@
z7HbBDQ6P_77ME{<Ww$iSXfuqjR8k-R3xDyx-JB96&+^5oiehIC*~n-OJ=-Y=N{M<{
zUMM8d+@KpotFHVuZ{vH|LgH6^L|9`?!586bBPf0Ph-J-<&#s*YJya^fg3^Scf-l}3
zDpMJS1)Zj(TH|jMf;}(0ITf_wBElmiWp|f}K~(J@-rlo~^AwU2cZgl-2^nB2!#i`z
z@n>xi795(HJu$^w1gCy(w)-PyW?8CPj=FBn`)4>*pHh)0W}$cCjIL5R!ld5`Zp*;r
z{aP55JXkH)-a>O=;G)ize1$MNbdZCWd#mxDUd2$8uwb9U01qeSGcr8fIe`>MzVq?(
zF7G5GFK!xPkbHFjMVCDn@_ij^&i50867hj-^srPwj`sjiH?gQ!k-5FsChl-nhW(H+
z7*i!XPBNqFfbQ(Xv)$RG5qs<4(cDrrQSkDZ%N)_^d;U_>o7(C#MIOrQ+(D{Zq{+WA
zu1&AHH--np)dZ{j!s&(olt>*K5ctMV<jmIVn^Fmjec}_>*LZ7zv!|`I4|+}Y$9-2t
zyHcmzn40CLHr$7CIl-G5R2BIVw?(qjq2Q)`ZT_Z7BDA=`5-PADOtD*T&hj6W5Jfi&
zf?@M46m)u-gGHS5pC-ys_HVvO@57GT$OhxVmH2_QTP_s$(SEf3zzw6s9Q%!V2G6**
z6qO|(5tARsy*uw-*N);V>7?#d6yia0tp#b6Ur6woh>eb7OsKeJ|4P?J{|jB4MDU+=
z?O*Xbzjf_@BiZ8jsr)AJo4{`ZzX|*%@Lv)D`y(v#f9l$}de9O_di-ZwQ9uKon}GyA
zZLUy*kLK!-i_kLY-c`3CS|>W80(!h?WhXp}Qz1r5B)5QVTZlfDVf2H{e5?yNKoy!3
zQ#kHdz<O1R@+|D_T80H6J7fTX6t|HU4c7^y_*K{-&}1XJ!4p#!vi=JQ>#a@HMF;x^
ziZnKLb_%2nYaqS>u8pHw0fJH9U!sl0M!W`FRiin>ed*0hK0AFx@rh$mT7zhn(JbFB
zknMh|eVO*HRLY=F@uVX2QVC#TB=xAymz8K>T<q8<Hr&VHuc0|UyZd=KC`!2nr;1A)
zkDRc?T5Md(;Bl3Bf~zP>zB?p7&I2d{rI3>xgO$|^hC1`X?`pf*Y6hbGb;@{7N)fmZ
zU%|njEGolWUeG*h-ELZ_0Dj0CR>f`X?74^dd}$v`-t}6xn@ym97QsVdKx_vBYRqT^
zCLH-dqsf<BXTg{}E*T=FK+XV3DPv8tL=Jk)7gm`jCM5R7g3hG8KBeobXk8?LUp}{>
z{h<ce;}hU^L*FKCdO?4XNzaP(x$lQZ1{@USxsl0PLos?0^35#)(f}oOHnu}=Y|i=Q
zoFHE>(bWemjKlI;TR!k)D*B_#?RgG2Voy{cYKL{N-5KO1tX7Xa*e13VXD@3y_!3au
zK6BCI+v&+jhuJ1YpnnN4(){C9=5xD<I}()EpMoaq;mpTImsFci`cFuLw8!#jJ#*yD
zG)qw<w9EBkGQHZnnZg&dUdsv3R@4l&hy@AsI&@9gk9-YSK!!>~+XMuEVy0W4Yq-^7
z!=^)U1m`xOWk)VJfJUBWpw^|BBFL=}LQ>5FzQF;02aCV}9bKvRGI1ax6yZ^?VtDot
zTOvSsrWi&9QaS?VjaeV@pzml6i5HS_VlsC<EvLgD0`~JA^YN+XezhWdk@GVAVR-CC
z<!+sRDjd!~DSLJS8DZ>9YuOi)-<P;z&7?f#CBu1elAtZ5PM&g?<j06$U3;7Gs-_Ge
zU?|-5w@@8og3Ly({la^UXaO8xKH$~4Yl3g0yvTXERZ-Ligad6}cke{iXPBbiL^=Ge
zxQz}aru|5a>y=T_*NK>I53zor3J12Giv|LGFgIJPUv$5;pg~4`UNh>=_z;3N2;7D=
zpacV+IfX_roP8*k`#p^<M&X=N?l25};_9380+%2%)=9kv^6u!z2=X#bek6U&sG>Zv
znZYUSSxGFw=Z}8xhVbJxMk_4bshAk@yn<@XXrw49YlDdksJEjvwvY!oOh@qN7Jkup
z<R!jvr0^xC$-TSZC@L=y4Y1b*RAyBq$P57P8h66*EDrIjRh{vRg?Xu5QMUUblO4Zk
z%N<>HWA<ul>g0WhL%H3f`OQZ=xIi}HPdDJ~yTRABRnJ>2&#j>rut(lx05TUlCzA0t
z2IQk>gVtXJZHJ<+`1C{(UMzYT08F=$Vg-!2>fQ_F#w4vKvcOzpxQKq14|~`uStXaA
z)ccY{hvHQ(iApY)e2P4Vycyq6TsnIWwu$zdH_C$hjO!tn-4|!Q&EJUI)a;)gQx@em
z%udB@y-$JP>FTjGhPx$%ME%JHWf$k_&jWw;w(EyRV~7u_7d%l=bS<9INaoS#gw&*8
ziSW%i{N>^Gv|7lhFD#^;GMwL-bTUlB-Y6~}b1E<L$a2g=E6VA#xeas`8@Au$^TWX!
zRzGuubmVvq&g@psYd}}Z)sWW%UW&v3nnY2}P^8`oO4ioq-Z{G!EzSg$!hFv*X{l*=
zF#{T&JqjGkugS@{50r<dupwZ72MtS}qWI8Kzuogy&A}hyiSNS?^?7&9ef<LR9wG#W
zaPTv{4#UpL%GDgEuFLpHu3!OjyETQ5)W4SDrWCAx(i>x9pN*CiJ!OzJk0{|dR0rZE
zndhfn>)ON7X?s9&4%)7^yvc<9NmsDp4wOz3kzt#i2&#{}YXgxXwb<wFJV9RSiNVJ~
z5Q?N3kE9t^y^~jv0i|sLwH<cN_qhB<mowmcXGg&mYi0-SD4EF$P1y`(iG1ShwkI|Z
z4Vc2@3Gk{X!zoiNlMv><c6HJ}t{43`_1;T{gjfz}^YKYcp8<wU$6OwpxeTw>H#@$|
z+^E-BZ|{2qqgAGT=_uK@0uAD6B-MCP&)633Wf{lN5yy{wr9CCFK77cvV$EWrl^%Jk
zO8P(fJzEj!@MDBXjEkvnVn95<?->;&JxCa}2(1h|&I=-U?P^ycNB|Vu=Edh&%!#mP
zB7cs}D&)Nr+u3jG=KchNs8(S0TR}d6dxP%5YdhBjXKR6PD}gbcXs}%Sc%p$Vf%3yq
zhLE53s}=~jH~4=$j8@#f+{Zjij!%Rh@vYyp6AbZ2HSGlzwCqHI3$;uf>p57;homF#
z(MBi>s}aF{4N<9jCYR>!ozWP~ZmSQDNC1wNRtl$VFx*4}t`I5aa<s8F8W6(V_<HoT
zJFhLGzd0+p)!lWu((dHiimN%~RBti_SW8g~rj#j{#cb7Sk$z%)SBS*d<Kflg(}tut
zJy?Y(rqvEj&$0D%UNIB~QVeZy%#xGk^B#}if>2}3S;P1J9Nccx{1vlNcjg6qeB~Sd
z^aK@i5LD*y)*?UwY{)FC<fe;|N*(da8tVMIKZNCl3JhIzJGOBr_ft%~H^fvC-<4nx
zw{%K4;4Sl$wL~6YOelf90bYgSn4+eEY5;JGZBo7ttklM?LZ5{T>{6)w-70r4XIl1t
zV|cVQj}PM=ExB=&O?VgnBg;JBSPziPUL`0jFYDL#gvA*`CNG?<3h+qxYrSr4gHNdI
zxUTK@xD;YzHa0grJH^?m`8GTOm2Ce=Ct+kyoq4F9xM2{u6K~LDpIf3(E-Kn;wp<X(
zJ%end17&Gw5!(Rhr127hM{3%oFb!sp_}zj|PYN#XbgIU)_U(2J>xb}jF>Da8Mjz}$
z8C3Gm{UbOf=E`Wo{pZkd;Vgb56p0ZrDTyB-*BV)d<Wot9n!>vIlMML@NGYuXq!C0<
z_p-JE+$8PAZ@76R1gD>2YosY<z(NKo&Uj0*Nv<v<Ex?BCDarI*LQGM!{X;<2vhu;l
zl1iL($ezh29qEHbMP!-0zQZG@8fUIr3o#B`uXWd9q>%tU004He9Yr2IU$Du#-rBay
z6$gW&*!uc}5i>+L8d7wt^^-~}kohu)$!FV_D_-c^eR~3KCRn#Xo_uGW5=UxcCO`pj
z2-w4*apo3Sa7NAjK78P4T+C&TUF2~MK|=x^xm;K8;bBg>&AUV;+`GIp?eV&>F(IB4
zuRl7<@#hisa2=6|#B!&ge`vfwvbrQv^=ny~GW%CPWgyKRzwmkPW=#=b<dWT`T0ttt
zV>{5w+qQ1iL8;+N&F-pvZ>ndgP*yP*Dk4oTLJmY*r|XuGk<s%8FsNkT)q-GR*R2-B
zdvs-7d6235L1s%aIK3HVV5GlnG3BI82w8pk*|>KSwVL8Dn5MAlMY?o*?af1OxGv{-
zjwT4amL;2@r!D5@>!P&u%DQgzHKJU&SrT<hq%%d5us6Cs0{JVV{JSizgeYVU54J6B
z%%0R!{Fj}r!7H<t#4NTBo)+$}lH%e9^|ehf`NvR%HYd)SH8{9vbRe0_Rw*7QOdKa6
zqU_OCo%%4kgwxY_HRGTAQzj2Z`my5VTT+P1Zd=|>dY{aGIXYY|a%H^{ibYwhoX)U^
zaDYByO|sH{kVnlb{f<FP1X27(BRnsa;^>ar>Z^v31XZ_#9oB0zAicTf<CF!$0<4{9
zYZLVs1JHemGBOGEMPRnhYf`2THS>9d31y&zj~WoDr+ncHrl*x$^%+_5h$NF!@G5ad
z&)r%zOlH_!*mNms$BD4gp|5=DP7t?>pSeVyC_M0FUAciuRmb)CZ42}ilSCIpw<MoQ
z;UEKk3mWR{)bw)sd#Obl6j-!rX8nG`&*$A_vlounqVfw?lAUwjZz^MlMTYgkThb^Z
zJqhy~>4~M=&{O2pAUcDfg@}zFv{RW?Ok7e97X*PhJ`36uO@|fiN-n(E2-kk|#GLZD
z76#=9#Bc`cHGFAvj7R1tV3^%e2Y$OA=R8K+{TNblN`fKXGLP_7;EmlaGW_zGt5ybf
z?k<CGy!(3x91mSoiG*Tp=cgRrEe`}Hz`M=Ra8m&Mt5S$*B*RZ{=>!=owmp=`@MlL1
z1qjbD$zezlPDk<lA_2;J8M5<PUWUFBf|gnL7PG}Q=bW#@OhF{CyRT=c+j5pi9VQY=
z;<R~D#)n|mT~1-J1nU-BG#G%r+4aSc&vZNA@v`GEa6VHg4N^v>Buln*<Xyw<Xnfyh
zB^wp6R(@nXxCk1@2VRy|EIP$h$E#3H)g@29jIt&fbL+q7YvCb;p%M{!m*@mvXN3sR
zA>#+A&9Dz?)G?i%N4|86>>W^7SfUa0?z$;m`H<=uRt2Tf9HGsOTvWh)i%hWdwQ>SE
zEpt&`KD(8Vu^KI-ywYYbG|EBPj8M~KvnaH}dUVT`g5%gRN2VOvpfMHPZ4wr?W0d*g
zJ&&s}*_5);U<1I~X$bhBhcJyG-IJW-WI|Wsg?!^>vSuK_2Y(c_$M%JZeBvOcMPFmE
zzLmu`rn*>50}p3)qUq~9*(`mc9i0wNSaj{3`Mt%{*a8K&qv1{bG{5~KX`;QsawR=6
zl6EwkQ`<)Ww0LBMx8vYUA<9&2{|AiHw+@)9dA@I$O5;ykHftQZKga;_CB^22*)wCb
zo=JpPQxXy?9E;fR%~C5mweO~9!bN3@Nf+|-a(ZX`0WST(r?rh~h~AILwx=JN8PK*~
z6S`cuWyT=^Um-k_G7p~Vp0k?{08betDm6lcY9?bU>tF8qzR=6S-jZm)1-!Vw3MZWj
zRRHH*bg;(|XX_1p|0@~0>@Q^ObG3h#v41^g_*=&QFUR}85B?_bo4{`ZzX|*%@NYx_
z-2WeB>`j}@QJIQ-D(nJ9Z2Vit3K3c9y?S3<{P55nrAqBlob!gWu+tyA92nMOoc&aH
z>vO))&JYX`2zZ&aBb`|B&Pcs2Ho^E8GQgc@RtH^DQCpW*_#uSA4Q44s^#R{5fpMv7
zXqS3+2aIVp?5&cVYg2w`V0q5vSKdmDzgeRJ?VCO6cAUSXBv_Vwn6R(_Jzq1nC=K1D
zV?-p3+le&|FbviedA9XCo+{9I=|9k>r3vHi;OCy^wA{COKsTbhq{%-HMo6Qg_}R@&
z8HiPD7Tem-`u6EYj5IjLsxU}sWdydf)ug@!s2tB@@Ag9e1P6x=KE|r3?kU%eI_BNT
z7Y*{)KEy!w(!eM-%1F!=_-T*j&Y};Dex>pHz{nMw+8D%<7M(~S;*dFen4*K=*lS%g
zDm_jFP!@{VMNw>eq3!yZEi4<=0j1HKH`P0Ofz{+`5nt7g3~7+EgmHLz24Wt(P#zP^
z3Z%3^7!>twpAneDFq$7{b|dOe{Z0Plp7dK2tJ>Do)((<em>|&Sw_Id-{1V%oB!`aB
z=er*<2IY+GjLu9>R)I{FE;kNpX7jdP^Th+kUuj=agE0N8v<c3f=Muk(CFn^D*CD1X
zkAu0xR0y@_h$d*KySyIJz3FDWEupmYuaLs9`+($g6BepUE%ee1>tLLAy@9QlNvZy+
zU+Pd-(mCDk%rec%+z~hsXk;qTLsPHg+9<8IS|G>pPD`x=t<*>|S^P2I_QBW1E7e%9
zh6(v;1;e2F>5gz%=F}RuZWyfEY2)$YmmXPdfLgz4w&K~|b_;vG9NcB=8W|M0xzav!
z{qMIrxAs*VyZjJE_Qs7KDRn(7qz6^Sn$rE1EZ^PFrFLKA#pcTy-O_2ox#5<?ogIjQ
zx2_ml<~-$?y=-iqi49}=@1y!8UPIZnTC?0#vaO?l&^(2jUfZC}ao16|`TaK~gjv_@
zImW^eR)m-M7HGauCT-L;(EM~cd*@e?-aDNKUH}^iy0g?z@YG%QNaq5$j}U^66^=qr
zrDPC0BTCHhOHZXp$k8#gs{_Oq_I^$XwZ~2X8Uh&!u&nj+$UK6W0nmED;(37~A8$k7
zh=6^bm!bx=<FPtn?x!~X0Q{hNHe|Krsx!Kb#bNkf5$qZ2lPm?{Rs5P>F4t$Vk5_jt
z?!`vDF%;rlaKJF_N94uyDOfnOzKQ`S?2v~dri=abA-r6js>&9Q<Su*WGg`YFJ$6uD
z-r_vzllgAwn_ATOEtx}CQs}G>GJ)i#+^S2v8d!fAT*-uyl|0<9HN3t=6wDOSK$j%j
z;Mn@IEH74YVwLno{5s6~B%~&4DxDK_GUbF18ileo9xJlXjZS8*rvj}*2vYV>H}Iua
zo~x<eAhV>{iP&gYgDV|e{=B)MA?fHvv1nW0vjfvo8Fk6j(ugwZDBABKIy{a=gim92
z6|MHsxAbEMzX^14pSB|l3J*9}=)oq7$t0{17SZ25frS<Vm)Yg^8Ub+5I}FySE=Qra
zbBzN&1{tM!?R$yL8uu^lLBr52bDya~*}5F(v5yuuGm<o~4%2j~48Iag|CxM|M5#r!
znj>Dq{`d-Lc&YdjUUbP`uSXQSI9^3h&C6DCEJJb(hP`5G+{wgqqh(!r&BGwj?oGZR
zJ(~Zu71^XmdzS?{gU)Y%bwzL|VMZ_FG82v*o+X2E)nq3>0_;wc3O$^)K@VK@tV?5)
zw`j<GBfKsrF&^8+9~d#S;>^6QdF$s0J9Fd|ck@IC2%Ir>h({H1tcIV$J!@>@l<jO?
zI#9CucGuvJJW~5E@EF_8EFhm+q)3STh-V4jZ2Ai-ANxe|xQIrj;K=O2=o{jNqeGM@
zpT*VCAhCu#I=-8nteIqyEW<9RVup)~EUvJLYw&(LVQLBs_6F2;I`K!GL#AwUDFB>y
z0W2NUcXl>1wcT?iz2yzY{*Ae5UQKaHp~ZaN2hzK@_mH2AmC8_@84pn9DZPM7F0pKs
zvmB`2E_0*^wN8s={fT5Jz#`TGrh}JUrlYt4*%71VbT&`f(>|>yBTj#YXDb6;2miq;
zV&QFka^p+0@-;}B?w-PJWXcrscxh|nU9K7Qw@a6qq%l*0ukQAbJ(1aDfFPqmO5r0}
zy#^I$6l-61!-`k?@xk0>Y-G1SD&K?w>sKexvTiVZ?2LzMR62b&3DdGZX@KJ)<dJ31
zPv)zI6_Y%$_S59zLyOrrF-^lp)iUUFN^hLZ0S!88HnE=clcU1C`79}Gf9~Zbo+=<n
z!|dM3F*3}YMny5h1WL+mHlw&WhY(qlV<_J9kcG8(L?E&1eVeuwlXNnfO2x|fcqLDQ
zoDfQ8(~b@NDA7${7Eg00{t1t`wGeLxbjD1MU+S_f7sao(wZNp_Y*>`OgIj&2-;Jp6
z^uC(k_KfIj%f=NZUhh$REU;GXO~0@|3G;n}3T0H9UZ~UY(fMK>gnIX~%;QRudHkzs
zD$e$ZGg&Yr9M<{&)81PE$&oGDf(0sOW@cs;Gcz+YGcz+YgNm6!DrRP8W~rD_U;SEo
zHgDf_&zkMGKRde$vow$N>k;X8B5tPVaSy-s!lJQiE2BPMaLygvW>SkC-Vyhu7zu8(
zPSTt+89qs)dwVJPQo#uz{iipt^{uh{q$6jTImhAp@ZeT#CuMn&Q>}P`pkfYjoV%25
zN>Yts;s{W4Of%;)1g_|EdxbD{Ve8dV)1pnE=amJ9CUf5z8i|d-^4eFbj}iH{r>7#Z
z?yo6@#T+c(4Y4t~qT}0q4NIupEBK+UO)eaPO}kLV0ZT5`;Axhj*52hGM1cE%V=?EK
zb6Yv2TuHsHE~>jyxh)wrb_L{+l@T#QArz8y%A{?wL&ig=mJTIYGzIm{CL(6XfjHW4
zV&Br~pL+)ANHxc(c>8Fp)CZv{+)U+hvf!ZGvXSQ^+yKd$6bpBSTNKcZ0hF6LPlMoe
z^T;A}`(FiTjZHFcI-9zhHx>7Ti!@fijg#yaLhLaEJBMT@Jxic9rJr*Y80S=~9+}T`
zEA?`j_V&+oM#x(xy7X1qnUDkXqz}19(3dp=EbBd^Cv6RY1+IrpoHbVWo8!A}f{zZM
zwePvU0;mHUbCab`EdT<N1kl7v+agk-W@-`TgC=*+z@R|4^msA$H3Y^Thvld9>vh*h
zNR*GiKiX>-s0y^sH76qF8u0j?!qwKv7*v3rm|Y!BkK0~)1|ax`f4-_v@at^*>RB<7
zT}bE24pSk0ua0rsQ;TdX{*_Jez4b@QspzW)COEg=78N2h3ay>gvQRqvSX36nX!x)?
zBMa&_MiXSzhBpw_feJvKZM9EY&22oAp`Xl0Jk;})4xFN6lZ)0Ia4iY7zV51ooHa|0
z>as`9U=_2pwNRvQi7hFDh=Z#A`mj!!U3wBHoi#0#u~g4lwzGRdzKfD96F8q%m<A=_
z-+cPBNaw{`PLM=n)^F2G<ch#~6WKsM)h-pw1LtYRc)z+2cbN=Y6{dv>(GaqrPLejJ
zDkUJTX$yXB;KJD;jp)4x-9z6*KU7nZ*o29;N#T2j3!|%gD9nYz=wiKI=vk!o>I0h^
zf8!wSjI`YgTjzM+a%=@eO3WIgV><~u!7f-3;5|Nfph_H0HUWC`SfyOY2Y`~>IHa$;
zMBiArc^Vrkd3QdQ${WkDxlG28O`p=H5a0}0>)k8kho!xX^ijRLN(k!Sej0NF42^Q#
z-(t{)WF7<wFpdHCfqB!rttAiJ;%rF<p2T<_B`2+)6gTMFRJ(ZFZgc;%ApSm|hzo~+
zPOmOcx?uFF!%x&vUtaies^Se`ZMVhElrC^T=G4mg7}u^20nLA-TV!*lbyCNK?jDkQ
zdh04|>x1r8*4rx#L(mJm$+Y+o2-BTcn;p!s<`ftcXL3SEdhsD;iNgxW$!NMr8eGn?
zLRAF$8%YvD))vY;8rmnQKwK!|A{)c24Lz0;x6NgJ?Q-1<&g1PfLuz>1X(>q>&-oRy
ztpRBuKYj*xO2dphl_ixF*%>BnQqlc*J7wpEgbj?XgEwU|YrZQGn96RSce?JQ_a!50
z8E-grnm2$IL`CApeGPp}{EXGc>snnyBtsToT(EL``8QRVaq8<63Y<~2iG6Pk7DUrW
zS>nasaQnGkD8ls-!17>SsN%Vfh_9}bk;jy4I__9aMT2oZ7LU=BBX2Etn3W9}iDfr}
zDpnEz4X2rt&F558PFncDU6H0WghcxYY9~U)?A;VKNn73qeLxgxSQTf)M6fnC2stgi
z+}g9;ytGL$R6C;R-a(NM5wK(~r#O}+#+kW?nF|X-`ANt5O1ODP{`#IU=2j^hbS2wD
zhW6)68U%z|0hykiKK#rBM4Su!^od;%U#po^3r`zIct(sx95(%kK;_fYkC*j&r)be|
zdL7MYcv&hwiLIn>oCP5^RIA+Nbf|?o==hxwWNF&-NiT$PH3E@{22YtV25)Ued7$@}
z+4IM`T*f5dHkKQ5l}=sNHk_kx`@pqwh)otaEz@j?8SL=zH;%ZC31KYEO+C{a6JI{e
z-fVwneeHxrDW9I#I?Q~L?_td|lqhx5D^P&Evw<&vBYz9<*Flf+DJW?AtR*60fX&B&
zd_#YM1+ebppXCdvjK1)c)*-lfGSFu1P;i*eLTRj(dU)NY1{vMW56A{C$mM%_<IT_F
z3^A|tkpj(&Uh#tC_}1j?VTe@+V|L37z%wL9CE*#!#4E&$M7ez+G0o`_LMy)B&07AU
z5@(F;ME<O>soJu|6rX2+X|Y%$TyuNjxy=~AhFospCG?8kW~=k<?2B&kne7p`_PMeY
zD7(!D_poLgy?<^onbtb#Nd7i)nx4IlEp!GZ(YQg`X_8@H3zzXQq1P37uSt>SggPvS
z^BvaRXqa`<E)wN_M<nFcdA;ubMk7qVhWS31Hbkj*CGL-BIcao{>KYRIIzicPN^n#F
z+$`(Y_v{j$2~>|#a-Td}qm7k{m|BnUa;o6IDaxSPj7_Ei=+0>uWer=SmU}f**-gF{
zVnB^g#Ib65N8o|-bA1X)2JLotiX5Y}HQ$j-a<`#NrY$qsP$70!_SWyDK%TWT8zo45
zJ5JvH$j1F*u*dY`n;;a^bsLRNZ_1}``5CfeBQ-SUK<=<8JU(K#y+u(!Z=){1GPCic
zR5K#1DiwX4+kl_yBtmY>TbbNjM(!%X`iI0Rkfe1gUWHA&*`8@qrXjOheUY};5TbW&
zkgWUd`{!doyphht@lfp4k9EcMChpi^!DC%A9av)n#71WR%^1?Jl}mkm=WC}GpJeR0
znesAM7In<D1|~n$-qFo%vQ@eAp?ty{$xWc8@!X!-gFFPAa}LNcM$9giiu$0L>On~7
z=_Ouw1qbeAxBp0#B~ut6n0~E{vj;H7&ZZaCJ>+LYO^v#f+WFT1CAvg>j8q||R$jk1
zs?sRIv;W??ioc<(eYsnX`d#QqHEr7Z81x)aIx)mmzFSCNDC+g5bP7dB_G|%$FwfHA
zp%kcq2)G@Hk`3*pafC@Gn9=JzX@ujO3>%$r0+hyo)ync$OTJ(7FI)5dA8J+jM>{~i
zAFn?O^#0iXL#tCV^FPxbf4KUG**~^_Zw2`Gtxo?c*HZl#$rDxkf6cZ0!s(w5{Bq>C
z{EfhG1b!p%8-d>l{6^saV+eo;{fJ2Xe!PM%zkhecYQZ~oi4S#vb%j{Ko}15bz=y&F
z;%n_f^S9~fnKlM0wekY#X7QDiGA&JryN9aZWo7*Q3Ds<*06x_5t5q8B8}1XzBwDuW
z;6r3C($wDMHq{I~ow-U#h&U1lIi5WiPY-ftUPlHf8dd*EmyQxzS&ul>(Fc<39AKyo
zL@4cq09L@=SPwP&a@E)>Hd|;`*hUQR-0S3`4374#D7LEtuu`;|qA)HlF;nU$)3V-O
zYHP6GKK<gL<>gdfNa#MHeAw5b_RdzH^7T5YB8*ZOucEaB+#ZC?jjoJ*C{IC|>C|lq
zgn>SDK2i$#AWgM!uH7Loa|<(VhYxTEml{{)V&&9K)m%eh^>l_EXWzOtTbSn`Vv+*1
z>?<(tpx)(P`nAp6kEDjGvh(>Y&8nAZcigH%GJWX5Z(IB8oC!K-$U~GLnPH6%j>&bh
z&((7S-A<}P!QfxtTss!~g{<jR%hczvFt7a-M<^cZfZLb_BXYvNr6%I&+_HU^b;aAs
z1@Vq&#^#JC&aXY_7$JHEz`wzzK#SDbJZsPtUfn>G;c!E;EkQQZ*~>FYi3G*AcOBm8
z5^9~Ev1f2VD1v46XsV&BN!5Z}l=BJ=9>s5mimSH6MK`0308D^3GkEl!vw2D}O#j|#
zM}P~-Wnipyo(5T>m|rX^2;9-Dvsc5!jH`c74-G=Z0-Ncw8%F%Op1yn+Z8{@^EyP|%
z%Z=1kn9-TdER4aImXo!N-On2+ei$};g@3ldmOc0*)(P*eFNmcKSjzMMV5sHm{FUv6
zDf!$eyS8N14nm76vgB7U@gxv~&ei>%g=eDJ2xm%~qrsN1DUzzkn7X})J(rhKhQIDZ
zOw0Xqx*kvM2SQpLu}0Te@6Q+9XVK#+!|aD=G)C6vSCIX~+VCzPra@Ma1E2Wr8Qqi7
z-%>+vE-RM$Qkgu;CdpYL$Bb3dO!@F8EPzhB*v$ba)n&^kEe%nqFo%3p04TUoyT5~`
zeT2srhUrSR0V@aW(kh`I)y+`%9ZrA#Jj=U4kK45VgI}67(pYp^b{CKl7tYmp@^WhJ
zKwpeaZ-#6sonUIZw>&}PkAl1eG)r4`oPJTyL(GgCC6eIOx*XB{(&VTc^vc+MEu!h*
z0VfF7wYX=+gPI2Q4U^W=Hg>}pyw?iaP;m588mGY~&gf!<vy`n(?rbHAO$z@4iRs(D
z-L-d(F2PImu0pNHRsnLtbI$>;#NsPaq^fY9aw>6sOj9i;5&-?EcstDbIe4}Q-1=8U
zfS&P$DTrQZ#(F3>o)ey9=XPsoXON5|OIKMKm|T<2$LGGRXd3DUf6ciQ*SinkY1m7V
zmy|jy))$-igKt|ro$ox*Asd4=u4#|`UYaw&tZNou@{fTqqgvHo;DH@>(7q928!+!;
zLMgl8eqT_mM4W}I=e5w(lO#4=TfjE7(<WfYQb{QodLo$rv~!!<Q?6PO1jM8f>}NDm
z_q@hbrFn`qo-*cD6LY8I!_c#N5OXO#7|&Y^OW<Vbg?O}B;^1-NQsWOB;kV+ptex7E
zerC(G(dpQZ_o9|&2CA&+_u(u-X0s7vsM1~<p10$5xwzDpQcb|1^KHr7N(QyO|9ozL
z<CVS?9Z|i`oZ^Pr!op-kPH<wrA)GECo$Lt<uq&EZ=QxT;r$BTOoxMN`cGjZotd+K6
zVN^9DT;aNKkuS&NRIj;AHS5=XxC^+`jmGeVg%&XB2Uv)oAUg12$pVvx%Jy@$`$YO#
z{smXz1K7of#37^Y4&v84$aqE?o-uuD9uKsg!o_0s5#9#i9n^NBd3X}>rA=4WJ^XmZ
zRP2tWZ5!4`{g2s1L2lhGz&{xOb9f!e&^|&VhCB1s0a087ZE9NwIi_9gf_oJ`8Ob)E
z4+0wr;;0HSX9w;)PN|LRlC5d)yu;?g{eT*7Y~hBhpLnO3wqECHa5{Yvxj)VCO<GJF
zgQzOqEXPw`xi%{UFN=ti#Htg@6P7^t=U^9Ua6obebxXrSvMwKploT{wC<}lHFMQVw
zir$!|5xyw8fN$%LvmU|*z(q3rumH0H>FKIrOHCpNVj@>SpX)VcyEhH13Dp4}J>_!D
z=BTMneuJkaLqwK3iX?UUX^BfRJJ0W;xl%m!<0=!&{ebn5y)_*ja@65WxN}OVh8C}k
zXm4;dlN|?LCrL{MW+!@st7M&q>%Hx%lPm1QwfTeIEwGg{nOu@ajIKIKI}<3=@E}_q
zcuc}OMR-M>@m;gHw~Cv)05NSjm5#rvbS&tI+Wz$-oc=;llf8J2d2<AVqwx15>Sc)~
z&`srO2iJ}6hlT1>y$s9mU$V(175avzErK8`ieY(mU1#z4-@R>|`akLz)r|Gk8i*6I
zAyHvR3x*@sn5^DB#lMxtP!L2+z6MiAOP;r<p@B>)Sm}v;F36JW$(p-3OD4B`K>m2T
zo9|ihW^SM>)6GfWV^tVpvZ80ODd~i3Sckn;o6&zr0B^Y?bS8zr>ws5kgUme`>h<oS
z41O4}_FdR}%pC=0S$qY+M$f~;_gQnX^G<ikHpq}kMWH02dg$y&uyK94bIOK*>m=e$
ztzg}<OqU_m9GfVN;oihoMW-U%pf)s!rhi7J<b(hwAA&sD<D-FzQ6##}j+>}mp=znc
zAMOLS_Hn14)yiyS=n=d}Ts~<Z!aE5NV@ppo!7JDP!r5j(wRdrGAT5mh#u@|OELjG&
zm>yboA5OZE0$m3U6Pb@gtR29u3DEsh5$lmDGe=ZSQGy}&&|#}qBE)G9U@Q{7$&;D!
z4DFJ*=M$2&7RUx<71m<g?j$}mXz;-juGn`{R(MsGXH&b@YDT5gnlDJpUU7>9`t}c+
zv^GjtbD=%)WrvsiAf-#Ml6E3-D4Jrd(*HTz0}AjDzTzio0SNFX8v+RUr(*7BT=q-;
zCEN3F3w3}5{CNhRf8j~~oqY-We`VyS&!DUShhtuf47vtC<&U3H0DZ<^dd8o6M%{nt
znSSY+e(IV2)HDCmGyl{x|Ed4@OaJkw{^Or|wqJU-pL(`G^$hg?82I52{T~DWkUu{7
z=Vb$^{lnS+0o$JodV>PU{q&VB`R|=J`#DTL{`Yo%KZmgqe<AXp+vojMgLwSCUFlD|
zwT!>Fga2tajQexv|B(2vSlVCn^_Tq1dHdhb(h5fW3GpAU{*xsA_wR|nXK8=I{(Ij3
zLgIg~{J+jyv6R0*|F3z=Hu3j%em{q)k^X+3<^8nl@cw(dU-R~B=HJ`>nzze%KTrJs
zQxP@}^GTt>wYGpu!}i*c4{SeLJ@b>>s3(j^Qi2S@rnGc_8k<d3Bw1K8Zk04Zw=b?O
zJ|i7Mjr(xLffsZ{BGzbEQN|v>X}Ord78YcH%c=<mR-T`$i<Fz1u@@(Wu2Y7Eu>KKO
zUyUG5aoY^WwV0#$$Yr+Gec2Flmuh5bgcBkkZn*>~NZ6%xiR<p?7}Su`=Y@&EePI3s
zazSeO+%In&Lv=8h<+nlm&8_}GCUQofuw$i}OgzOV-jARrG;SmzIpb<45o*S!`Glk^
zz9CSaX#q)L0iwdd&`lt_R3O$XQb<wi>|WZGiw>kEa|KdQ67gY>3TAn3)n5dW8kMzV
z-M2tvjdElzjz|Plt(zEjX3h+<5}aKIGn2FUs;6acAH<vP);#(S@`2!8lig^2(SY&S
z^*yR>zcu!d3BXJ!-EdIaSurC@dp~zfKDLrq9cAaDmU(eGYnK6rX7PV4D1i7hhK_q`
z&FQwj{#99p+$_v#7^>92Q3Ghjma^WS@a$sFR>_Ba`(1(ey{W9p10XxccD_LY&C{~U
zJOFt_zsi8@i|YGL;DTV|sed~uY}PtFI7<XFAlN4xkO^rGb&p`=EZB487f~n>&Tf3u
zo8AF5#1##qSGfX(RBUKSG-(`Y!i{9~o}{o*iRhiS&dD|&Uqvr&SuvQ4nGcHD9S3Eq
zVyrsYuDK%br;JqKwo4(vA=6nP7W^3$+~X4NZWra8oP4ctJ=2szQ7%L&t*4t!_S0t=
z>?2wkEwvGdgmP@!AHwXW7+SY0N7X3H)rs#G2z!>~SQmYl^*}CK_@0*m7uO{(j)gzV
zEP;?SNO=z@7Y}Q~RQ3;)?6;ObQRciH$`PX(r{5J4$t5JOU|8P?J$BWEmR1)T<oP^-
z0t$c`>EX%k34=`=93=~eZ01V`ckbL%cT|vNMX!eHM*`iYL_5};GnKMQ6gsL};E#W4
zC1yN9Cmix618q%rotUM)XNiDGww9&oDX!2!IG?b-U+&I}ipCI_$e(Mm>5yTv)VVJ9
ziL7RL8SwlHeDL9mDJPeIe|&@D+HzvDcV!r7kymi|^laU}gVQ`g6$6)LB4UjO4L%69
zdCp-&6fND}{&ql`4_C`UQ@oQljFkI?!P2`cJBf_1p8#8&0WU35Piyt_R;)7q^z&9#
z3%GZJ6Ud<=@S{|SJYOYmYEM<%@&LpU-}qUVZk|YiXJap8t3FB;pWp#Q1evE*m+oF%
zCUD;iGn&L5&Dkqva|;<I_#k?MWf_cFV9e`l4%gt*OXGe{v%Si_z_$_vHdMPxSvwXI
zRFFMT@T=5UFjCsNHc(D0m#G1Uk`;&?&d=JiO<t-4o6dknWkO>B5L0h{STc#8*0iBa
zSlGlc;xr%FOuxkhhIQhdclv;rrwf*CxV#8<=;O!74zKC+J-O^r2`xNR!C&tul(GM)
zwm6Sm4Ws9+Or=OjOM9Ie(C%D~^tK0Q)auvK8)c?c8co-vUIN0y+N%a4cuYByAr+yc
zjJxZK#;3Z2)C3~h$3|9S-uz;J@Qg=-bATqys3R7-kco?vs>c#G^pSTHudLM+Y;(HI
zOx0@(I+^c^Xn0ZeLWBe0iT>DA37$R0;nU@C?9B5?{k>6^Z|DYi{n#{@3%wL^+Ptuc
zp^Ko$SnSjlP}t4m^$>q_Gv{un2u6H9?-INU*5-ra4%N+kl+46=gNDL4_8JndblJHJ
zbs<VE)48bp0z55rG?x*-Me$aQoUp*6J~ll)#kE=zESfLrbxA%U=Z<nRUKTy=R5ON;
z7UD1nR))MKGK~WbzO*w2P>YpRsfYVoDN%V$cVfa6437>Gg=k<`L#HqD=KF}Er(PI?
zJEqu`DJB#wJy~Jpa&KAL=OG8I29(%!=R}y!swSE2m}$Kd+~Z?XlLf50AKSKRW>gP!
z?+C9ab4BT1;JX-zRD^<GQE`OD^wopcF+_>GXpin%zxl^@W{&#+DHGKMrgp8>ayRBv
zdgdwv-z@5x%QkH}=hN}mlCm^gOByG9(p;YAsH~PW&Gm0T2L}K@Fl?wexnc{msN%RJ
zt#=j>xs+3su+KFsK^PY$>pHQ-NNQ=co#-d-23k+eN{BHYdaEpyrpmM^o@a?!d*t{M
z)Qnx1;aMYN-Da2iEeKDhzb=t5qyx+K<$SOM#|Z*O@IqYa+n3j!lHR~29mBV9sw;l{
za&d=`@<qhl*xO&=Ec&o9Hs<8F<>Xt;04cPu@MQ-|svEb6UTsSA%!0I~!#v~Qc9^&t
zBoXgvVHa7-Z(@Au4C%jAAv(2VLZ8cKuUdv<Swq4$&n`7SlcQAQqCdra3>Is@lsij+
zC^2mHcL-{PC93y>W;?(_Xd><lbrQ{mWSlG=5|*p}B#9EJKA3-1pe7D{?t{)Tu~SEo
zSjn{G5s(Odz*+@KmbjEL``k9BvnI_bg$h!Bwn>@Wzojrzv9g(G_Ax~|aX<2d$@3le
zut<oX&%!BTn~~x@edXJkZkJ;_6@Egj>>7ex=Um8yHxdeqq7mLiVJt`R9QAy+eCW0)
z_*b2g?9a$>4=THDxpYBozSw<hGa^KRxayB`xvR5lLC=z_9|056K$WA}ae(~0LI;<4
z$)4}H#5ImvgfVNY-O&i993Kv*h08>i<K@}Fn?V^b6^=F;X_Du&se3+Hy)wgG2)q@D
zexzM>>E<uFb}IWkBr;+Z-H6m1tC$qbo_AdiVPi^F{JA84LCqLWI=_3_1(2NL0Adf7
z3A&O8qF%S{wmGKl(2p@qlXX6D!sC)CC@$N`H@X>jP_LmNG2^dlBh0}6^Ki;B;a{V&
zU;j@28TS9@hyB+6M&LIBzY+M2z;6V8Bk(^S0dSuGSQ`<9in1*>r3V_}ozaU#N}@m9
zVtN*%%p<Ic&pqLiuY3+kZn-+u@R7T4DzJfvOb9<>tK;)^e%?FIdLoBnIAH;%KXkL2
zi}v(xp6yg`U&jg-DqrRzxwFcYEil-_)L;d(>Xwn;#Kujr$<*kQ>^ue><nq{TAolKz
zUd9a-b-uoNuB16qO2@W~?}W8VRx=j6T#*9_&&qU%<MD?LfVEf!EPyypQfx#*%>Jk6
z2mpwf$_{EeAyEVoCBYB~0S!D?ToOK5q;Sl^+jARfXa^>AC-?}UD((g)*vc&C8rc=2
zY2$I#A{bsYu;BNNMd-yCbY%UekVS5cVE^OurqY5&o-f>HLNdZTDx=huuJ9ik4HB=D
z-SMhkGjOcy*;Sdtfy6czqHi7X{Uquqm9!-1U{iXR<{ul6)%hwj;96Vug6CIZ7GSG}
z{OwS>50VK5zzUO5If;kAIUzgnP`nzPL0NjmN$9~?>muBk?yTVuM-H{8nyWnVyJg$3
zeB+4V2iIeLLA87gR|WYR*R4@38~u^8pXKDSboK6b0a4Tl#4_qSv<gdB@<2C#9YA59
zQy=wBacRQb#U~rqTUD@%W4~EU;{gTX78JU2vd94w)BWu0{d8!&2c6l{1lzm4@_am2
z{U=}mc3d#<9U_M=_!sHIEf?~;3!zx1<DI@c%R_4Q0Shbljn7W0uJ)60ji&`bapy5k
zxIOm>lnJWA3E2@yj~gR%I~12t{;=aohq3@f4h+SV=HCDpdqF|hb*ejUdl{A>GW16K
z%xs^>+4X#FrTo#w8lrtBw%R>P3X1sppyX|+xXUqcXF+H8E*+iK`A~7N1BScs{GrLd
z{mMxvN|5P)qbfD|Y@0=qOX}Gx)0{5J3#u0<{!}+;ycaJy*3TN;rlNG;HqoXp2fb{N
zYs${^5eA{fQD>55M6ZeE@t$0(7Q{8qpK?5-zFF?<T7^l(Q_u_P46}~^i|I@dG;LO1
z?5%TbELo;l?9!wZm&C5pyHBO+<aEV3rVEYqM~|egSE(kLEtBR_Nzj9pj=(JWj>Pqu
z0q2Rc?sD62!fLaY#G@B|AqFlhg_x*9-8=bfbL)NC2f^8P_8unRCI;#nQG}El4m_%i
z!!Ql*jA&b-t0%wrkXT1|7l7RXH+(WxXW)IVE)vJe>Z0>+L2~;DN;K;C<UDd24LlE8
zWwf51cjJB2jkjm!)>Y=83WD9$*SXV%=Z8g|Ab!1T22@hdPqJP1epTQth{dhK<<7QC
zLokuqRMN%dk%&G=sxj%|_>Rsz?N)2%F)ZQA$ziRpYOLQ1-4DxOEay20SP)=uktye9
z+30=o?EhV=hGCqd(9IvH)5QVZ$lXwH8!?4!asA^$et#F=TIG)9CAW>PsXoOPadDvm
z_VXM>5@K5|$Eis)HVu`Bw=BHLYD8xmit(}+J!)EHto1{biL0w49lo#m9Rhv#0iLMp
z`~fO+V^@@c6yZhke7q5iw&C(DNkbU>&^P63E&N)xA+^XfEPTj-uKkzr5P50v#ESlT
zJU4+@r`PLJ7WGn9B~82f9Dg~5S4xKFd#x=O#^J!gEKRrdo)SCIgv`-Mh)|t_LLjM)
zUc2g%1VG-M`@PG2`e2z-)LKzfIX;{c#_N>E$J0cLW_gA8^i^J@+{{bs29zT{D^lOx
zB3|`Kf(!trE9*r!6xFPW3shF~A05#CtQ9LvOZ|j{!{Z@r&su)28A<?*q_RWeDz@%;
z6a(4$-rdj~BZ6d`9B-j;w?H+5R6iq(o3Z?e-OfQxtlSL=+{9@J(e*h$>CPTJq=(_c
zEk!3<q2a5xi%@FlvR^e^lRp;a18beaX!&(5wC-1nn`@XAn%20^BC#!7V1<hC-TTN0
zy($tzrXBcD5LZm7=Mfx34*$^3a%p+VX7#p);xrV!?R4-R2_=>V_xl<~bZebV3J7O!
zkE8?R_qape$;i|tz+=`|Q}6ZfD@Ue4lSS@%P91&{$Ad=4`j<p)G_#g%Y(wbTw|TE;
zB986ax#GNx-HW`yLdhv`_pM>xdS=v{Hdigx@mU8gTZXtK<QoOW+>>joW5io{d6JiH
zoEOU4WFt=5uA!^}3jT183PiYy=`no?L}6yv1fEJs_eEC%DL#WYEML)2tm>yiY@9eM
zOI!@Za90hhpD<Gd?~{=6dMzx!ijzaywf%TR$I)T>h>{=v^VfuEHmVle26dz(dgZTO
zpQ$bO@Clx4k>R%{5+A$Vl*8aJq$8X9#dZ-5_qCXhd()O6uC$xD5vQ8a4JTot^FJ`u
zj1)3IpcI>#XMevzfi;#O)ERIYL838I4Kjr#gp(g2Rt#ed{6NvZ%_8pX3U}+_DTpso
ze2{~0wtc@7JstmzAgsyWWGO-zb%hWPUyB0hs>oXMluPWfmiI~f8Q3{e!!ZTd)zn2P
zJ%s)OWFTt}Kijmt*uV!Z!el~`Gok`KFdXN3&OzOFYfFa2?k%{{GvY_rQ!Q`nr9J<S
zOP#UQ_S<<jQnN>d+*%RF@MI;uZw+)$VlcswFx6GcPwjq6>)0EBRamiJl0U+W{LA>%
zcbi55rx!Igz~?fGqq9tR`l!2>5N&ejY(1-E1R^n!aE&TG-Wt%0IHHm$$w>EWct{wi
zOwyxRQyD^1*(BE$HhW{mL&vmcRrrT}DY;TrLtQ_V$im2pyq$DIXkKQ68dpSns+dt*
z20aDUk3O6ZLfI*gFaUNbWj*VzL`&H?FD+3Sdn}3Gu?QwIhr*(oyNRvZED%?pg6dQG
zuxTRxo|gaw`1`7c_n$G;FZq|*(SNIISj+#D!T1L8AH%SJ&sK*0$Kn6>{r@Uk{0oO)
z@-OlD@6Q$wRsMMa{&4l5w9&ug|9>xA{NH|Iemn3Rf!_%HM&LIB|636VgFTe%%&_m&
z(9*QhSq%i=Kd+(g*}C4Hn}?albIzDSacInA3*!qb1a7!*ru0(uRhXsUJ+x(@?>tD_
z3I25Ei^|i_)_4${37A~lwBK;~W+B93L89a1S7#WPdeI*qvY;H<zi@xFav}O9hEx&E
zie)qlY9#Zp((gdx+t$cT4I*t`(w8bSHH|>kB9^b$9zgF&l|q65i(l=$S8h-0!gG_K
zEyZJFofk2pzvA={zy>PdA2O(Bb~bOFXJSuew|%N0PQ<50e61usctg>a`&@P8F3zu!
zv)T$pyQ9J8`I=hMJ{XGjb<kro^f_L?`Qwis#kA*DAglZB>KV9x^3^Y-w&3bB0qoRO
zG~S9kYB8|vk&)+WJ%+J+%hVlBoWL&S(q4L7dX<%j1Ot^rhs;cb(z{FY@a98laQ*rA
zZRg#w_n(8_1WSaNObtm+0}=*fqJ5G=(h0V4B;4DLH}o4*j5L)eTJx{-*G28cZhdKB
ziL4Vjg`;p~k@~B>y?K3|8`&5wY3}me&a`h9Bim%_4E2SZ>R4tjY}RZ_t_?Rh<_ZyV
z!)Er=$6#6ya$%qC(=!F73o;6zwmnP_dXj5ZDH|mO*7p?Gca7W-R-7=rWCi0+iVgsn
znCz4{*^*23E2LLeDMcJ|Y12qXK)rDR0steLaZHB6^E2GF9uh{$E4G;*hChbNwf85}
z@JoVIF^9;7t!q-NeuKC7Hq?E^6jHSEg-Mpni6qu10g=+kln*C=hf}^8rTxl%4nw^6
z&=dTgh2UG;qOPjJ?=kvXf;^rMtrY!8VZ(EZAdFpo{E>rI`B8K4LXOpX>&*BfX8#^X
z^Byl?Z$K>Y+)xGOo#<T`oX|W2;q`<>HLWRrS>EV<+rT|P$0BeS`Gd$iF+D0&wi%yF
zChph59uAxsz(+U0d*5lao)cb(!RcKjQpYAK2HEzT6rEOfm?0a-OlD6Rd{8*9H$W@$
z3$qMxZ3d8cPJFGan{@zf!KW#SU;(`dro8xUd>yHM_s$zpNkpY#pv@Udw3FNPwy?>a
zr5c4VHrOUSCU2rWip;PJx@G?DYX@;l#5IQj#-gWKgrN-~J8xqQcgJ_U$Ip7fuxdSo
z;F7E$hGos~<?~#x^CTd4_bbPEH@4Wxi)O&VYfR#mxjefV-ol?lh-ebILVT)y)01`d
znFxxoA&nvZD_;&WFFTrBx{-3JXq?>!)_LH7noCXf>x63;msx6?l>&Grb}>G{Tedf=
z;N*0qb-8UGWy{qPo-UT76jGXp%Ooa|^&&2-@(MSX*yxBqF57{P%(7LmuEP3mP;QVE
zSlyf<s?V^ZIdUq@Yf`vQ74Aa$cLsrp?Cs%TF&Ws+Xupf(%fFcrm2=XK4&)!td0@cB
zX<ThEF5hcA8m+76;C_pYBwFc`OTnKy)p8&{wqM+2Q%hlan5!Nq41~$svuuJ%w3v2>
zKK9HI<PvXW_-MPinSBEB@r1nn6n+<s`^uhVOPTE@1$!A@eKEO4ZA(3U=sYljBVkv7
z-_u1eBf?7{TT4+r($+XK)FWWp5*jW@5MG?z0ql!j`e=b#bWrcqK8yerCCJ91>bG0t
z_e4%#GLv#u_e{HtKyX7e!XP$@mgF(xqj+G$Et&+a0JB5lE)*e84fIct<q&KPz(@(K
z-`K|9$73usX{OdaFXCPM@Zn{Esnb^6yJ?RPYr23G><#*X_q}W8`Q*i}y&&*emS&0S
z*2h9iZPLc0sT7bhm^P`fk(xGLbcS0F0N`K>oGgseuS1lPo#96>nlI0DqfTQggH`iz
zrAOSy<JHoyGG!6Yg1gGeA}^z`stk7J=6n0W;`j;7J++!!D6Ow9>D966vEn6olJys>
zT71O(XtXDKqIGU})5KPiTR40IfDYT=vQNXhy_jtMtekhJMRSoBM|O&V-fL2n=^{f^
z+|uO7NWD`CGR8c(8#YimIfPWl<?E5O4m$xO>ULb|nWhp?VYUZpcDAE@AXfAt4slZ&
z+F8h%`ouQmPgr|j^tQ_=wHX|?`~b!4c7zXVa3CVZpxrD5N%pGf`p8?C?fILi>?-n~
zeXcaOImCeJOvGM5<1UZG&C3Uh?AuaAj2zmpOg5b}Owoh*fRf-$w=b=g-O6*xDbPn5
zo)5oR_JJEeD!%|c!eSkV@=}t;s8*0?9UPFu6BYWZ1YP1?3}&_Rt!mXd)D!af+&n1}
zb1SwF8ryE>(lnEdgyMpP=}_d`Cl*pB?d#)3+)OALPe0IUZ=`Z_YI&|qZA^iP^Gt6%
zPdxuvPgyw_xuJ8qKA8H<swE)<6&lB`W~5GB_J%w!G$&4f;2Y+e$_C?tQi}#_xx{hs
zo;tX7V5ndzpQh(9ltY~&H)jaq4hq7(hO)_nOnF4n!%F=6Q~s2PpSMPIle&*>??tVS
z<?D-SC@y3Qmncz*O0?HiFu$b##ynd;)qECBH$Ei?s=#DRd#UZJmTuT<La{WnbvUe?
zcRrc2D)UJwo*^;Y8sKHpvPh=gbLRL^#7QAo<|lJZIE6ZX8=GV>f2c2B!@}@6w`d=r
zn70*})r2f<_|ZRWY8+095u}JVxTx(l@r8-#P+rU6;cbO{RcNy)suw7IiG}O$hJo5H
zXUwhG=J3B~FM1iDy^|y(V3<v}e%TJ1xNlT)WqWx&$Z7?vBx`I;Kt&)zekR0_C@|8c
z<s2B2bsVHUuDY=~^BAFYlEYKiYW$M^j%?LXt)vuKGpUCqBfiv-J2sn)(b?{2iywo)
zr$^`h+TSpF1WoH(`0d9!O7pwxB`<}{p1SEwfOM%X2g6Nt7ANj=bwsC=Mrz8%ByQyS
ze176uemjK;t@-3PVvFPxBo?3*eG&UGg8jzf$)`vplvgtyY7Ag%arno_$U0A#xVIMi
zJOaBe01%w1%y`#KyQq|?N%>r6Y{}_J$Y>Nkd%7S(aQgapnkR%h`8cOzo2~ayeH}K<
ztKIF{C7>XpO95Rmis+^$$zdB>m!V)Fba^5v?oZm{RoV--&uPvbqo2t-hF9%{2J!3n
zh26Y3#2%Idl_@H^H>oKv>2H8?>6fVL1V*MI=5@&(eamJ^Wpp2ga2vE?mOd+GLm%y5
zx;VS=VwEOYd?t7xxu+ob7>bh#bpXUb04|!>%74cjR9GT>tl?KMAuv=-Z#Kq*B2J?s
zTIqta0e=~H<R?z&+OgIG-CiZEWC`tG#Zs`kj&X3K;v8t~Y7T!QN8*j+*ol~|a6T*B
zD7)c-d>)DDa7!FvO&I>gl<KX(W*CX!tUDjRCoJP|sWG&b5v>R@dTZ1xzX{y7WskM9
zrm>`2Jvfvkb#*&K^^V76#@<G9tK7}Yqaav->sFx{I|v^^r(2sj73{BSP|*L$4*Y9q
z`Ox^UH7LJ0kH0Yg*Mon*`;EYF1b!p%8-d>l{6^q^Fai*N<o*4A<y;5HzAi^pl&n*h
z7c#Uk)%V6Zw+mX^WGX{3V38IjmN!sBBm{yotaVR2I&tPs)#@F_dM!_!hE=>Ndt1oo
z0)JeWs$nh9UgkrA=s|Gc#H|^U!m^)?Y&`@6eDN8!4ArF;v5vu(GV37JZ{)wewtR;L
z*`~u16x$g8m?^KFhLg0cKsWz&sb{WbFh>Iom-;JISF+hb<$iyFm_>y?uE6)4VrjB&
z)6D~CEseV8Pe0bU*(km{4Um_(yN}dOE~%F7uJ-XW;TJXKSsIukpc1dOgGC(E{G&s5
z!w{B|4UPtu`4Ez~SR<X+#fttImZFF6;?<?R=drrWA<le<7RK-)F$T3$tl*iecol(1
zeSgjq<D$Kqz=Dsy_glvv{8zM%{@ZUS>)p)mSK8V~YtWlUTQ~R#-l~VWJe&DYp!JdX
zEpw}vE9t%RPWvMQ2@!Ha9q-8}EDobcIRx3=W@nBEX-*1qr}osJi=ds?Q^cG<Bmon>
zvKn7T;~rv!(!SjWXKCY1@G>&<$zDi6QtBHoilVg~$S25ZJw4EMNV%Ro?X<&BO&GGg
z@O8WL-(-Lx-8K5HTvL?=%${`-aYIPM;|8jRe~;^8A|T5su>vbN>Jzs1=(UiiqA~U%
z79oORdALM;EP#YA2)62+2COnzY(tjFeV=x@*gPtqMf-5t+**{xt^G)r%4Or&(CdBp
zQAM1zt)uL_>N*#UPc`!A1xU2$`35W1>*@4EW#F(~_JF>#_G9;>{5K{2$Xe*#4+V1P
z^-w{5@H{7r$F#u7rdD@~XaL)4P(aK9IYC}ZcXiS8R!bn(gVFU3TJ6U1$!37MfJD|c
z$B{E5F}{>YzwX0bo}}ER#%xdivSdYEBM2Aip_^Biv;qjef{`%Ei%y*}h1<Q^Rov_T
zK3pA61~CkjujI`BRBFlq3Y7!+Lmz;8G#zob?wo~^NMelablW>}lODt~7-<F$*CRkC
zf*fa9-$o{uIVE{XVal=cS|LQ6VIY>P*J2V$wPcB_T2yEdrCA)@n&S`DC*}wqQ0DhW
z$_))8EtstuWC6_x_U1L}jQL+x6oGk57IQjRrL*yE%YQtTfut?ABtDo53U4f00FaH{
zX6lNTod&eCR{sqSGagzOC=Pxb{dvI@Sv%rjIlfR7r38vUHq<$IGh?Q1oXv=xPlm~8
zSPxH%ic87|6>5m@Lv+0G&7N0DGYPJI#b&ECm1-aeifYtYb(7Xh2}rG$y_CU@cVPY9
z`8lAq>r!#4Myr7Il?2OM(LAo7e*Yoa1mDfk0R>`qEINvBUEN$RE99DVK>@pYK)~7M
z*Fx^MP1{&Mu0<5M=xJ!(Pv2V;HE7GuQE%<%-KgeX5pSi%pBaG{#|ITho3wL?WV%0?
z9gp`>1tZ?9ZB{VWR=W(yD_x)*_Tn^mhi%uF(ZAXv>r$nEYBu@40FyOnblPp03NIff
z<?Vh7eb##`i?3W4Z{!eWmgPgrxA+XjevRyKCka)dwqPeMr!YaHDHMdlx7%~L4YZyN
z+#9G>_SUvf+zw)>{oyI!+DXLpV3`PcYF_lTA`@866wY=mPve<I8*dB?gCD)Qm6_f@
ziD=eH|GV{;unj!PW`XmZLB30kC^tP1N-{j%;?)}l^K|lq9g0K`w7c<ZHL?j~3GfgK
zBb<2!81>iN@4GFRd*{sTg1q;eRZV7!9<Q4_fAlQ+N53LcAPe0n82t24yHQz_ZclQq
z74sk9P1x~Tsg|8^ztqd<@B{_^AS|0huMKicI)MrU&v?oXa9r8K`#7Z-nJkW65$vo#
zVu<#oVwMw{0>1)WL={BU%lC0qT*FM1YdDK2ZM3KCQdhAFM%b<`2iWOI5qm3Qa~b!<
zPW6UwcFY;EXM}gLjl<%Qz$SPecc2AlMnF(~nerPC?}7FqRZr?rP45T<bREF2i+Fl+
zV4hkB9pDeB#fI(bYM-C|(&^z4wZ|nD%iX4~jmg<z9@KqXTiCmO*7bC$_R{-7Gs+~2
zzNZsFY}oPxHF-ttrU-q2gA&+Dp~g|+^w5|tek%&jpmnvNzs&m8Q<6#ih}x~kwAXX@
zju%3<-e;eVb8i#hT*sAomn8N(r_5gU35RK*mkRHu0a$wwp+349;t}{A2zUKmt@nm`
zHGW>UXW-}*CkMQwD7vq~s0H0T`K}J2+S-(l`)Jy9J7P&s?s-!VtmFtcLw7pd{ofhM
z_l%zSiCL!<1~*)WV+Fr#M}dwJ`T?Thr#0qbwhMZXk5h?TpGs_P4k^7+397ZLs_GlL
zt9F>+7=Ioy^C`7o+Zc2y%p$wVO?SD-^OB35=b#cjUAiXiUTI$g>D{G7X&A4)<ye7n
z^PuH+@|cu}O|0R^-ka{u@$B^ib7l-OBy32anadV|g%DL)G2cZJ&e*rqbz^`KxKa3K
zG08paPd(UlBUTES*VNINgq%vuFbUETo<>*Z!z$Uz_$tptA44hMT$-E?x4Tlw0ljDg
z|KZBp#Q4kU)I6+N&3IHWgKVX#UmjN$hT-?|vDjugpRaPAAw|U}hJcf0C;Utg<%~}m
zWIygh<{y2th|s?*?~bRA>qJwwhtOfi=@jD?r<ONyCWNfA_o`e#^TgG$n<ctS*ilEv
zS}smdz=fA3-Uqgr47jJX3j18D-KK%XsVETZp6-3(24%D>MMx3Pw0XXTFAl_1rNz2e
zUXiwFazq^>EUbJ@6b%yBde|Swu*`ljbXs4o^+UR9WP-KJ-WD~<s1E0xCJm%?zHcML
z?&c~Tq_63ViE9)(HdemfQyQ2tDXDeGwv6SJsq3Ab96GZj(vC^B?LzEuG_blQ(gG}2
z+c2vslyh(?E#X#MOb6`t*uqqQ(TZ0`#|SCU84jBoSk-<1!Z}S}HU>#8c`hR06td?A
z%rD;rOI!QFV+iHD^Er$Wr#6@=X8(7Bvv1l-`tvhs;h&HYb@8a<55{Fx;i%xy!Oqz+
zTji0>{1S0K0bpU`5hs}Hda1qIQo4OEY=`^k#FUc{`E|cohQI0)7lhX8v>8d!fN=rp
z2}_YL+kEC@1ng+{neegIVD@r`XsX_KcNGXtrz<~cKEG$bbHX@hKw5mbO1vuR4-*I=
zweT;Qg;)jIhWo~;q{{A-i7?m~Auw7E*;`U#!C~GH5nYD!SrOq94FG%HMp5eD=DuT7
zOGW9cPe?;vTta5nM~HU{#2hiDGC8ftM6S5ffk$dlQ&P)0lZjk?xh@t=v_4E7ok?aO
zfe`h$3IC>U&K`D)NLVBrd;Uf062<^sMiC{k<sfsXN>U0*{`=BI#zZv%vc4tBMkY@9
J{}UeRe*xHW6cYdd

diff --git a/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py b/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py
deleted file mode 100644
index 07045383..00000000
--- a/src/bitshuffle/bitshuffle/tests/make_regression_tdata.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Script to create data used for regression testing.
-
-"""
-
-import numpy as np
-from numpy import random
-import h5py
-
-import bitshuffle
-from bitshuffle import h5
-
-BLOCK_SIZE = 64   # Smallish such that datasets have many blocks but are small.
-FILTER_PIPELINE = [h5.H5FILTER,]
-FILTER_OPTS = [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)]
-
-OUT_FILE = "bitshuffle/tests/data/regression_%s.h5" % bitshuffle.__version__
-
-DTYPES = ['a1', 'a2', 'a3', 'a4', 'a6', 'a8', 'a10']
-
-
-f = h5py.File(OUT_FILE, 'w')
-g_comp = f.create_group("compressed")
-g_orig = f.create_group("origional")
-
-for dtype in DTYPES:
-    for rep in ['a', 'b', 'c']:
-        dset_name = "%s_%s" % (dtype, rep)
-        dtype = np.dtype(dtype)
-        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
-        shape = (n_elem,)
-        chunks = shape
-        data = random.randint(0, 255, n_elem * dtype.itemsize)
-        data = data.astype(np.uint8).view(dtype)
-
-        g_orig.create_dataset(dset_name, data=data)
-
-        h5.create_dataset(g_comp, dset_name, shape, dtype, chunks=chunks,
-                filter_pipeline=FILTER_PIPELINE, filter_opts=FILTER_OPTS)
-        g_comp[dset_name][:] = data
-
-f.close()
diff --git a/src/bitshuffle/bitshuffle/tests/test_ext.py b/src/bitshuffle/bitshuffle/tests/test_ext.py
deleted file mode 100644
index 11be1ffd..00000000
--- a/src/bitshuffle/bitshuffle/tests/test_ext.py
+++ /dev/null
@@ -1,588 +0,0 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import unittest
-import time
-import timeit
-
-import numpy as np
-from numpy import random
-
-from bitshuffle import ext
-
-
-# If we are doing timeings by what factor to increase workload.
-# Remember to change `ext.REPEATC`.
-TIME = 0
-#TIME = 8    # 8kB blocks same as final blocking. 
-BLOCK = 1024
-
-
-TEST_DTYPES = [np.uint8, np.uint16, np.int32, np.uint64, np.float32,
-               np.float64, np.complex128]
-TEST_DTYPES += [b'a3', b'a5', b'a6', b'a7', b'a9', b'a11', b'a12', b'a24',
-                b'a48']
-
-
-class TestProfile(unittest.TestCase):
-
-    def setUp(self):
-        n = 1024  # bytes.
-        if TIME:
-            n *= TIME
-        # Almost random bits, but now quite. All bits exercised (to fully test
-        # transpose) but still slightly compresible.
-        self.data = random.randint(0, 200, n).astype(np.uint8)
-        self.fun = ext.copy
-        self.check = None
-        self.check_data = None
-        self.case = "None"
-
-    def tearDown(self):
-        """Performs all tests and timings."""
-        if TIME:
-            reps = 10
-        else:
-            reps = 1
-        delta_ts = []
-        try:
-            for ii in range(reps):
-                t0 = time.time()
-                out = self.fun(self.data)
-                delta_ts.append(time.time() - t0)
-        except RuntimeError as err:
-            if (len(err.args) > 1 and (err.args[1] == -11)
-                and not ext.using_SSE2()):
-                return
-            if (len(err.args) > 1 and (err.args[1] == -12)
-                and not ext.using_AVX2()):
-                return
-            else:
-                raise
-        delta_t = min(delta_ts)
-        size_i = self.data.size * self.data.dtype.itemsize
-        size_o = out.size * out.dtype.itemsize
-        size = max([size_i, size_o])
-        speed = (ext.REPEAT * size / delta_t / 1024**3)   # GB/s
-        if TIME:
-            print("%-20s: %5.2f s/GB,   %5.2f GB/s" % (self.case, 1./speed, speed))
-        if not self.check is None:
-            ans = self.check(self.data).view(np.uint8)
-            self.assertTrue(np.all(ans == out.view(np.uint8)))
-        if not self.check_data is None:
-            ans = self.check_data.view(np.uint8)
-            self.assertTrue(np.all(ans == out.view(np.uint8)))
-
-    def test_00_copy(self):
-        self.case = "copy"
-        self.fun = ext.copy
-        self.check = lambda x: x
-
-    def test_01a_trans_byte_elem_scal_16(self):
-        self.case = "byte T elem scal 16"
-        self.data = self.data.view(np.int16)
-        self.fun = ext.trans_byte_elem_scal
-        self.check = trans_byte_elem
-
-    def test_01b_trans_byte_elem_scal_32(self):
-        self.case = "byte T elem scal 32"
-        self.data = self.data.view(np.int32)
-        self.fun = ext.trans_byte_elem_scal
-        self.check = trans_byte_elem
-
-    def test_01c_trans_byte_elem_scal_64(self):
-        self.case = "byte T elem scal 64"
-        self.data = self.data.view(np.int64)
-        self.fun = ext.trans_byte_elem_scal
-        self.check = trans_byte_elem
-
-    def test_01d_trans_byte_elem_16(self):
-        self.case = "byte T elem SSE 16"
-        self.data = self.data.view(np.int16)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_01e_trans_byte_elem_32(self):
-        self.case = "byte T elem SSE 32"
-        self.data = self.data.view(np.float32)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_01f_trans_byte_elem_64(self):
-        self.case = "byte T elem SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_01g_trans_byte_elem_128(self):
-        self.case = "byte T elem SSE 128"
-        self.data = self.data.view(np.complex128)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_01h_trans_byte_elem_96(self):
-        self.case = "byte T elem SSE 96"
-        n = self.data.size // 128 * 96
-        dt = np.dtype([(str('a'), np.int32), (str('b'), np.int32), 
-                       (str('c'), np.int32)])
-        self.data = self.data[:n].view(dt)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_01i_trans_byte_elem_80(self):
-        self.case = "byte T elem SSE 80"
-        n = self.data.size // 128 * 80
-        dt = np.dtype([(str('a'), np.int16), (str('b'), np.int16),
-                       (str('c'), np.int16), (str('d'), np.int16),
-                       (str('e'), np.int16)])
-        self.data = self.data[:n].view(dt)
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def test_03a_trans_bit_byte(self):
-        self.case = "bit T byte scal 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_byte_scal
-        self.check = trans_bit_byte
-
-    def test_03d_trans_bit_byte_SSE(self):
-        self.case = "bit T byte SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_byte_SSE
-        self.check = trans_bit_byte
-
-    def test_03f_trans_bit_byte_AVX(self):
-        self.case = "bit T byte AVX 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_byte_AVX
-        self.check = trans_bit_byte
-
-    def test_03g_trans_bit_byte_AVX_32(self):
-        self.case = "bit T byte AVX 32"
-        self.data = self.data.view(np.float32)
-        self.fun = ext.trans_bit_byte_AVX
-        self.check = trans_bit_byte
-
-    def test_04a_trans_bit_elem_AVX(self):
-        self.case = "bit T elem AVX 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_04b_trans_bit_elem_AVX_128(self):
-        self.case = "bit T elem AVX 128"
-        self.data = self.data.view(np.complex128)
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_04c_trans_bit_elem_AVX_32(self):
-        self.case = "bit T elem AVX 32"
-        self.data = self.data.view(np.float32)
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_04d_trans_bit_elem_AVX_16(self):
-        self.case = "bit T elem AVX 16"
-        self.data = self.data.view(np.int16)
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_04e_trans_bit_elem_64(self):
-        self.case = "bit T elem scal 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_scal
-        self.check = trans_bit_elem
-
-    def test_04f_trans_bit_elem_SSE_32(self):
-        self.case = "bit T elem SSE 32"
-        self.data = self.data.view(np.float32)
-        self.fun = ext.trans_bit_elem_SSE
-        self.check = trans_bit_elem
-
-    def test_04g_trans_bit_elem_SSE_64(self):
-        self.case = "bit T elem SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_SSE
-        self.check = trans_bit_elem
-
-    def test_06a_untrans_bit_elem_16(self):
-        self.case = "bit U elem SSE 16"
-        pre_trans = self.data.view(np.int16)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_SSE
-        self.check_data = pre_trans
-
-    def test_06b_untrans_bit_elem_128(self):
-        self.case = "bit U elem SSE 128"
-        pre_trans = self.data.view(np.complex128)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_SSE
-        self.check_data = pre_trans
-
-    def test_06c_untrans_bit_elem_32(self):
-        self.case = "bit U elem SSE 32"
-        pre_trans = self.data.view(np.float32)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_SSE
-        self.check_data = pre_trans
-
-    def test_06d_untrans_bit_elem_32(self):
-        self.case = "bit U elem AVX 32"
-        pre_trans = self.data.view(np.float32)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_AVX
-        self.check_data = pre_trans
-
-    def test_06e_untrans_bit_elem_64(self):
-        self.case = "bit U elem SSE 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_SSE
-        self.check_data = pre_trans
-
-    def test_06f_untrans_bit_elem_64(self):
-        self.case = "bit U elem AVX 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_AVX
-        self.check_data = pre_trans
-
-    def test_06g_untrans_bit_elem_64(self):
-        self.case = "bit U elem scal 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_scal
-        self.check_data = pre_trans
-
-    def test_07a_trans_byte_bitrow_64(self):
-        self.case = "byte T row scal 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_byte_bitrow_scal
-
-    def test_07b_trans_byte_bitrow_SSE_64(self):
-        self.case = "byte T row SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_byte_bitrow_SSE
-        self.check = ext.trans_byte_bitrow_scal
-
-    def test_07c_trans_byte_bitrow_AVX_64(self):
-        self.case = "byte T row AVX 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_byte_bitrow_AVX
-        self.check = ext.trans_byte_bitrow_scal
-
-    def test_08a_shuffle_bit_eight_scal_64(self):
-        self.case = "bit S eight scal 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.shuffle_bit_eightelem_scal
-
-    def test_08b_shuffle_bit_eight_SSE_64(self):
-        self.case = "bit S eight SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.shuffle_bit_eightelem_SSE
-        self.check = ext.shuffle_bit_eightelem_scal
-
-    def test_08c_shuffle_bit_eight_AVX_32(self):
-        self.case = "bit S eight AVX 32"
-        self.data = self.data.view(np.float32)
-        self.fun = ext.shuffle_bit_eightelem_AVX
-        self.check = ext.shuffle_bit_eightelem_scal
-
-    def test_08d_shuffle_bit_eight_AVX_64(self):
-        self.case = "bit S eight AVX 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.shuffle_bit_eightelem_AVX
-        self.check = ext.shuffle_bit_eightelem_scal
-
-    def test_08e_shuffle_bit_eight_AVX_16(self):
-        self.case = "bit S eight AVX 16"
-        self.data = self.data.view(np.int16)
-        self.fun = ext.shuffle_bit_eightelem_AVX
-        self.check = ext.shuffle_bit_eightelem_scal
-
-    def test_08f_shuffle_bit_eight_AVX_128(self):
-        self.case = "bit S eight AVX 128"
-        self.data = self.data.view(np.complex128)
-        self.fun = ext.shuffle_bit_eightelem_AVX
-        self.check = ext.shuffle_bit_eightelem_scal
-
-    def test_09a_trans_bit_elem_scal_64(self):
-        self.case = "bit T elem scal 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_scal
-        self.check = trans_bit_elem
-
-    def test_09b_trans_bit_elem_SSE_64(self):
-        self.case = "bit T elem SSE 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_SSE
-        self.check = trans_bit_elem
-
-    def test_09c_trans_bit_elem_AVX_64(self):
-        self.case = "bit T elem AVX 64"
-        self.data = self.data.view(np.float64)
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_09d_untrans_bit_elem_scal_64(self):
-        self.case = "bit U elem scal 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_scal
-        self.check_data = pre_trans
-
-    def test_09e_untrans_bit_elem_SSE_64(self):
-        self.case = "bit U elem SSE 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_SSE
-        self.check_data = pre_trans
-
-    def test_09f_untrans_bit_elem_AVX_64(self):
-        self.case = "bit U elem AVX 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = trans_bit_elem(pre_trans)
-        self.fun = ext.untrans_bit_elem_AVX
-        self.check_data = pre_trans
-
-    def test_10a_bitshuffle_64(self):
-        self.case = "bitshuffle 64"
-        self.data = self.data.view(np.float64)
-        self.fun = lambda x: ext.bitshuffle(x, BLOCK)
-
-    def test_10b_bitunshuffle_64(self):
-        self.case = "bitunshuffle 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = ext.bitshuffle(pre_trans, BLOCK)
-        self.fun = lambda x: ext.bitunshuffle(x, BLOCK)
-        self.check_data = pre_trans
-
-    def test_10c_compress_64(self):
-        self.case = "compress 64"
-        self.data = self.data.view(np.float64)
-        self.fun = lambda x:ext.compress_lz4(x, BLOCK)
-
-    def test_10d_decompress_64(self):
-        self.case = "decompress 64"
-        pre_trans = self.data.view(np.float64)
-        self.data = ext.compress_lz4(pre_trans, BLOCK)
-        self.fun = lambda x: ext.decompress_lz4(x, pre_trans.shape,
-                                                pre_trans.dtype, BLOCK)
-        self.check_data = pre_trans
-
-"""
-Commented out to prevent nose from finding them.
-class TestDevCases(unittest.TestCase):
-
-    def deactivated_test_trans_byte_bitrow_AVX(self):
-        d = np.arange(256, dtype=np.uint32)
-        #d = ext.trans_bit_elem(d)
-        t = ext.trans_byte_bitrow_AVX(d).view(np.uint8)
-        t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8)
-        t.shape = (32, 32)
-        t1.shape = (32, 32)
-        #print t[:20,:18]
-        self.assertTrue(np.all(t == t1))
-
-    def deactivated_test_untrans_bit_elem(self):
-        d = np.arange(32, dtype=np.uint16)
-        #d = random.randint(0, 2**7, 256).astype(np.uint16)
-        d1 = ext.trans_bit_elem(d)
-        #print d
-        t = ext.untrans_bit_elem_AVX(d1)
-        #t1 = ext.untrans_bit_byte_scal(d1)
-        #print np.reshape(d1.view(np.uint8), (16, 4))
-        #print np.reshape(t1.view(np.uint8), (2, 32))
-        #print np.reshape(t2.view(np.uint8), (32, 2))
-        #print np.reshape(t.view(np.uint8), (32, 2))
-
-    def deactivated_test_trans_bit_byte(self):
-        d = np.arange(16, dtype=np.uint16)
-        t = ext.trans_bit_byte_scal(d)
-        #print t
-        t1 = trans_bit_byte(d)
-        #print t1
-        self.assertTrue(np.all(t == t1))
-
-    def deactivated_test_trans_byte_bitrow_SSE(self):
-        d = np.arange(256, dtype = np.uint8)
-        t = ext.trans_byte_bitrow_scal(d)
-        #print np.reshape(t, (32, 8))
-        t1 = ext.trans_byte_bitrow_SSE(d)
-        #print np.reshape(t1, (32, 8))
-        self.assertTrue(np.all(t == t1))
-
-    def deactivated_test_trans_byte_elem_SSE(self):
-        d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')]))
-        d['a'] = np.arange(16) * 1
-        d['b'] = np.arange(16) * 2
-        d['c'] = np.arange(16) * 3
-        #print d.dtype.itemsize
-        #print np.reshape(d.view(np.uint8), (16, 12))
-        t1 = ext.trans_byte_elem_SSE(d)
-        #print np.reshape(t1.view(np.uint8), (12, 16))
-        t0 = trans_byte_elem(d)
-        #print np.reshape(t0.view(np.uint8), (12, 16))
-        self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8)))
-
-    def deactivated_test_bitshuffle(self):
-        d = np.arange(128, dtype=np.uint16)
-        t1 = ext.bitshuffle(d)
-        #print t1
-        t2 = ext.bitunshuffle(t1)
-        #print t2
-        self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8)))
-"""
-
-
-class TestOddLengths(unittest.TestCase):
-
-    def setUp(self):
-        self.reps = 10
-        self.nmax = 128 * 8
-        #self.nmax = 4 * 8    # XXX
-        self.fun = ext.copy
-        self.check = lambda x: x
-
-    def test_trans_bit_elem_SSE(self):
-        self.fun = ext.trans_bit_elem_SSE
-        self.check = trans_bit_elem
-
-    def test_untrans_bit_elem_SSE(self):
-        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
-        self.check = lambda x: x
-
-    def test_trans_bit_elem_AVX(self):
-        self.fun = ext.trans_bit_elem_AVX
-        self.check = trans_bit_elem
-
-    def test_untrans_bit_elem_AVX(self):
-        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
-        self.check = lambda x: x
-
-    def test_trans_bit_elem_scal(self):
-        self.fun = ext.trans_bit_elem_scal
-        self.check = trans_bit_elem
-
-    def test_untrans_bit_elem_scal(self):
-        self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x))
-        self.check = lambda x: x
-
-    def test_trans_byte_elem_SSE(self):
-        self.fun = ext.trans_byte_elem_SSE
-        self.check = trans_byte_elem
-
-    def tearDown(self):
-        try:
-            for dtype in TEST_DTYPES:
-                itemsize = np.dtype(dtype).itemsize
-                nbyte_max = self.nmax * itemsize
-                dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
-                dbuf = dbuf.view(dtype)
-                for ii in range(self.reps):
-                    n = random.randint(0, self.nmax // 8, 1)[0] * 8
-                    data = dbuf[:n]
-                    out = self.fun(data).view(np.uint8)
-                    ans = self.check(data).view(np.uint8)
-                    self.assertTrue(np.all(out == ans))
-        except RuntimeError as err:
-            if (len(err.args) > 1 and (err.args[1] == -11)
-                and not ext.using_SSE2()):
-                return
-            if (len(err.args) > 1 and (err.args[1] == -12)
-                and not ext.using_AVX2()):
-                return
-            else:
-                raise
-
-
-class TestBitShuffleCircle(unittest.TestCase):
-    """Ensure that final filter is circularly consistant for any data type and
-    any length buffer."""
-
-    def test_circle(self):
-        nmax = 100000
-        reps = 20
-        for dtype in TEST_DTYPES:
-            itemsize = np.dtype(dtype).itemsize
-            nbyte_max = nmax * itemsize
-            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
-            dbuf = dbuf.view(dtype)
-            for ii in range(reps):
-                n = random.randint(0, nmax, 1)[0]
-                data = dbuf[:n]
-                shuff = ext.bitshuffle(data)
-                out = ext.bitunshuffle(shuff)
-                self.assertTrue(out.dtype is data.dtype)
-                self.assertTrue(np.all(data.view(np.uint8)
-                                       == out.view(np.uint8)))
-
-    def test_circle_with_compression(self):
-        nmax = 100000
-        reps = 20
-        for dtype in TEST_DTYPES:
-            itemsize = np.dtype(dtype).itemsize
-            nbyte_max = nmax * itemsize
-            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
-            dbuf = dbuf.view(dtype)
-            for ii in range(reps):
-                n = random.randint(0, nmax, 1)[0]
-                data = dbuf[:n]
-                shuff = ext.compress_lz4(data)
-                out = ext.decompress_lz4(shuff, data.shape, data.dtype)
-                self.assertTrue(out.dtype is data.dtype)
-                self.assertTrue(np.all(data.view(np.uint8)
-                                       == out.view(np.uint8)))
-
-
-# Python implementations for checking results.
-
-def trans_byte_elem(arr):
-    dtype = arr.dtype
-    itemsize = dtype.itemsize
-    in_buf = arr.flat[:].view(np.uint8)
-    nelem = in_buf.size // itemsize
-    in_buf.shape = (nelem, itemsize)
-
-    out_buf = np.empty((itemsize, nelem), dtype=np.uint8)
-    for ii in range(nelem):
-        for jj in range(itemsize):
-            out_buf[jj,ii] = in_buf[ii,jj]
-    return out_buf.flat[:].view(dtype)
-
-
-def trans_bit_byte(arr):
-    n = arr.size
-    dtype = arr.dtype
-    itemsize = dtype.itemsize
-    bits = np.unpackbits(arr.view(np.uint8))
-    bits.shape = (n * itemsize, 8)
-    # We have to reverse the order of the bits both for unpacking and packing,
-    # since we want to call the least significant bit the first bit.
-    bits = bits[:,::-1]
-    bits_shuff = (bits.T).copy()
-    bits_shuff.shape = (n * itemsize, 8)
-    bits_shuff = bits_shuff[:,::-1]
-    arr_bt = np.packbits(bits_shuff.flat[:])
-    return arr_bt.view(dtype)
-
-
-def trans_bit_elem(arr):
-    n = arr.size
-    dtype = arr.dtype
-    itemsize = dtype.itemsize
-    bits = np.unpackbits(arr.view(np.uint8))
-    bits.shape = (n * itemsize, 8)
-    # We have to reverse the order of the bits both for unpacking and packing,
-    # since we want to call the least significant bit the first bit.
-    bits = bits[:,::-1].copy()
-    bits.shape = (n, itemsize * 8)
-    bits_shuff = (bits.T).copy()
-    bits_shuff.shape = (n * itemsize, 8)
-    bits_shuff = bits_shuff[:,::-1]
-    arr_bt = np.packbits(bits_shuff.flat[:])
-    return arr_bt.view(dtype)
-
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/bitshuffle/bitshuffle/tests/test_h5filter.py b/src/bitshuffle/bitshuffle/tests/test_h5filter.py
deleted file mode 100644
index 6739b998..00000000
--- a/src/bitshuffle/bitshuffle/tests/test_h5filter.py
+++ /dev/null
@@ -1,91 +0,0 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-
-import unittest
-import os
-import glob
-
-import numpy as np
-import h5py
-from h5py import h5f, h5d, h5z, h5t, h5s, filters
-from subprocess import Popen, PIPE, STDOUT
-
-from bitshuffle import h5
-
-
-os.environ["HDF5_PLUGIN_PATH"] = ""
-
-
-class TestFilter(unittest.TestCase):
-
-    def test_filter(self):
-        shape = (32 * 1024 + 783,)
-        chunks = (4 * 1024 + 23,)
-        dtype = np.int64
-        data = np.arange(shape[0])
-        fname = "tmp_test_filters.h5"
-        f = h5py.File(fname)
-        h5.create_dataset(f, b"range", shape, dtype, chunks,
-                filter_pipeline=(32008, 32000),
-                filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
-                filter_opts=None)
-        f["range"][:] = data
-
-        f.close()
-
-        f = h5py.File(fname, 'r')
-        d = f['range'][:]
-        self.assertTrue(np.all(d == data))
-        f.close()
-
-    def test_with_block_size(self):
-        shape = (128 * 1024 + 783,)
-        chunks = (4 * 1024 + 23,)
-        dtype = np.int64
-        data = np.arange(shape[0])
-        fname = "tmp_test_filters.h5"
-        f = h5py.File(fname)
-        h5.create_dataset(f, b"range", shape, dtype, chunks,
-                filter_pipeline=(32008, 32000),
-                filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
-                filter_opts=((680,), ()),
-                )
-        f["range"][:] = data
-
-        f.close()
-        #os.system('h5dump -H -p tmp_test_filters.h5')
-
-        f = h5py.File(fname, 'r')
-        d = f['range'][:]
-        self.assertTrue(np.all(d == data))
-        f.close()
-
-    def test_with_compression(self):
-        shape = (128 * 1024 + 783,)
-        chunks = (4 * 1024 + 23,)
-        dtype = np.int64
-        data = np.arange(shape[0])
-        fname = "tmp_test_filters.h5"
-        f = h5py.File(fname)
-        h5.create_dataset(f, b"range", shape, dtype, chunks,
-                filter_pipeline=(32008,),
-                filter_flags=(h5z.FLAG_MANDATORY,),
-                filter_opts=((0, h5.H5_COMPRESS_LZ4),),
-                )
-        f["range"][:] = data
-
-        f.close()
-        #os.system('h5dump -H -p tmp_test_filters.h5')
-
-        f = h5py.File(fname, 'r')
-        d = f['range'][:]
-        self.assertTrue(np.all(d == data))
-        f.close()
-
-    def tearDown(self):
-        files = glob.glob("tmp_test_*")
-        for f in files:
-            os.remove(f)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/bitshuffle/bitshuffle/tests/test_h5plugin.py b/src/bitshuffle/bitshuffle/tests/test_h5plugin.py
deleted file mode 100644
index 220d55da..00000000
--- a/src/bitshuffle/bitshuffle/tests/test_h5plugin.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from __future__ import absolute_import, division, print_function, unicode_literals
-import unittest
-import os, os.path
-import glob
-
-import numpy as np
-import h5py
-from h5py import h5f, h5d, h5z, h5t, h5s, filters
-from subprocess import Popen, PIPE, STDOUT
-
-import bitshuffle
-
-
-plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__),
-                'plugin')
-os.environ["HDF5_PLUGIN_PATH"] = plugin_dir
-
-
-H5VERSION = h5py.h5.get_libversion()
-if (H5VERSION[0] < 1 or (H5VERSION[0] == 1
-    and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11)))):
-    H51811P = False
-else:
-    H51811P = True
-
-
-class TestFilterPlugins(unittest.TestCase):
-
-    def test_plugins(self):
-        if not H51811P:
-            return
-        shape = (32 * 1024,)
-        chunks = (4 * 1024,)
-        dtype = np.int64
-        data = np.arange(shape[0])
-        fname = "tmp_test_filters.h5"
-        f = h5py.File(fname)
-        tid = h5t.py_create(dtype, logical=1)
-        sid = h5s.create_simple(shape, shape)
-        # Different API's for different h5py versions.
-        try:
-            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
-                      None, None, None, None)
-        except TypeError:
-            dcpl = filters.generate_dcpl(shape, dtype, chunks, None, None,
-                      None, None, None)
-        dcpl.set_filter(32008, h5z.FLAG_MANDATORY)
-        dcpl.set_filter(32000, h5z.FLAG_MANDATORY)
-        dset_id = h5d.create(f.id, b"range", tid, sid, dcpl=dcpl)
-        dset_id.write(h5s.ALL, h5s.ALL, data)
-        f.close()
-
-        # Make sure the filters are working outside of h5py by calling h5dump
-        h5dump = Popen(['h5dump', fname],
-                       stdout=PIPE, stderr=STDOUT)
-        stdout, nothing = h5dump.communicate()
-        err = h5dump.returncode
-        self.assertEqual(err, 0)
-
-
-        f = h5py.File(fname, 'r')
-        d = f['range'][:]
-        self.assertTrue(np.all(d == data))
-        f.close()
-
-
-    #def test_h5py_hl(self):
-    #    if not H51811P:
-    #        return
-    #    # Does not appear to be supported by h5py.
-    #    fname = "tmp_test_h5py_hl.h5"
-    #    f = h5py.File(fname)
-    #    f.create_dataset("range", np.arange(1024, dtype=np.int64),
-    #            compression=32008)
-
-    def tearDown(self):
-        files = glob.glob("tmp_test_*")
-        for f in files:
-            os.remove(f)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/bitshuffle/bitshuffle/tests/test_regression.py b/src/bitshuffle/bitshuffle/tests/test_regression.py
deleted file mode 100644
index 2862cace..00000000
--- a/src/bitshuffle/bitshuffle/tests/test_regression.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""
-Test that data encoded with earlier versions can still be decoded correctly.
-
-"""
-
-from __future__ import absolute_import, division, print_function
-
-import unittest
-from os import path
-
-import numpy as np
-import h5py
-
-import bitshuffle
-from bitshuffle import h5
-
-
-TEST_DATA_DIR = path.dirname(bitshuffle.__file__) + "/tests/data"
-
-OUT_FILE_TEMPLATE = TEST_DATA_DIR + "/regression_%s.h5"
-
-VERSIONS = ["0.1.3",]
-
-
-class TestAll(unittest.TestCase):
-
-    def test_regression(self):
-        for version in VERSIONS:
-            file_name = OUT_FILE_TEMPLATE % version
-            f = h5py.File(file_name)
-            g_orig = f["origional"]
-            g_comp = f["compressed"]
-
-            for dset_name in g_comp.keys():
-                self.assertTrue(np.all(g_comp[dset_name][:]
-                                       == g_orig[dset_name][:]))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/src/bitshuffle/conda-recipe/bld.bat b/src/bitshuffle/conda-recipe/bld.bat
deleted file mode 100644
index ccbb10f9..00000000
--- a/src/bitshuffle/conda-recipe/bld.bat
+++ /dev/null
@@ -1,3 +0,0 @@
-SET CONDA_HOME=%PREFIX%
-"%PYTHON%" setup.py install
-if errorlevel 1 exit 1
diff --git a/src/bitshuffle/conda-recipe/build.sh b/src/bitshuffle/conda-recipe/build.sh
deleted file mode 100644
index 34c3a689..00000000
--- a/src/bitshuffle/conda-recipe/build.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-export CONDA_HOME=$PREFIX
-$PYTHON setup.py install     # Python command to install the script
diff --git a/src/bitshuffle/conda-recipe/meta.yaml b/src/bitshuffle/conda-recipe/meta.yaml
deleted file mode 100644
index ac227e2b..00000000
--- a/src/bitshuffle/conda-recipe/meta.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-package:
-    name: bitshuffle
-    version: 0.2.1
-source:
-    # git_url: https://github.com/kiyo-masui/bitshuffle.git
-    # git_rev: 0.2.1
-    path: ..
-    patches:
-      - setup.py.patch
-
-requirements:
-    build:
-        - python
-        - setuptools
-        - cython
-        - numpy
-        - h5py
-        - hdf5
-    run:
-        - python
-        - numpy
-        - h5py
-        - cython
-
-about:
-    home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py
-    summary: "bitshuffle library."
diff --git a/src/bitshuffle/conda-recipe/setup.py.patch b/src/bitshuffle/conda-recipe/setup.py.patch
deleted file mode 100644
index 437a5ffa..00000000
--- a/src/bitshuffle/conda-recipe/setup.py.patch
+++ /dev/null
@@ -1,13 +0,0 @@
---- setup.py	2016-01-19 16:56:12.954563000 +0100
-+++ xxx.py	2016-01-19 16:56:00.817087000 +0100
-@@ -40,8 +40,8 @@
- 
- # Copied from h5py.
- # TODO, figure out what the canonacal way to do this should be.
--INCLUDE_DIRS = []
--LIBRARY_DIRS = []
-+INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include']
-+LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib']
- if sys.platform == 'darwin':
-     # putting here both macports and homebrew paths will generate
-     # "ld: warning: dir not found" at the linking phase 
diff --git a/src/bitshuffle/lz4/LICENSE b/src/bitshuffle/lz4/LICENSE
deleted file mode 100644
index b566df30..00000000
--- a/src/bitshuffle/lz4/LICENSE
+++ /dev/null
@@ -1,24 +0,0 @@
-LZ4 Library
-Copyright (c) 2011-2014, Yann Collet
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/src/bitshuffle/lz4/README.md b/src/bitshuffle/lz4/README.md
deleted file mode 100644
index f6ebf5e1..00000000
--- a/src/bitshuffle/lz4/README.md
+++ /dev/null
@@ -1,21 +0,0 @@
-LZ4 - Library Files
-================================
-
-The __lib__ directory contains several files, but you don't necessarily need them all.
-
-To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**".
-
-For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly.
-
-If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm.
-(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.)
-
-A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***.
-
-The other files are not source code. There are :
-
- - LICENSE : contains the BSD license text
- - Makefile : script to compile or install lz4 library (static or dynamic)
- - liblz4.pc.in : for pkg-config (make install)
-
-[official interoperable frame format]: ../lz4_Frame_format.md
diff --git a/src/bitshuffle/lz4/lz4.c b/src/bitshuffle/lz4/lz4.c
deleted file mode 100644
index 08cf6b5c..00000000
--- a/src/bitshuffle/lz4/lz4.c
+++ /dev/null
@@ -1,1516 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2015, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-
-/**************************************
-*  Tuning parameters
-**************************************/
-/*
- * HEAPMODE :
- * Select how default compression functions will allocate memory for their hash table,
- * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
- */
-#define HEAPMODE 0
-
-/*
- * ACCELERATION_DEFAULT :
- * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
- */
-#define ACCELERATION_DEFAULT 1
-
-
-/**************************************
-*  CPU Feature Detection
-**************************************/
-/*
- * LZ4_FORCE_SW_BITCOUNT
- * Define this parameter if your target system or compiler does not support hardware bit count
- */
-#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for Windows CE does not support Hardware bit count */
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-
-/**************************************
-*  Includes
-**************************************/
-#include "lz4.h"
-
-
-/**************************************
-*  Compiler Options
-**************************************/
-#ifdef _MSC_VER    /* Visual Studio */
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>
-#  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
-#  pragma warning(disable : 4293)        /* disable: C4293: too large shift (32-bits) */
-#else
-#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
-#    if defined(__GNUC__) || defined(__clang__)
-#      define FORCE_INLINE static inline __attribute__((always_inline))
-#    else
-#      define FORCE_INLINE static inline
-#    endif
-#  else
-#    define FORCE_INLINE static
-#  endif   /* __STDC_VERSION__ */
-#endif  /* _MSC_VER */
-
-/* LZ4_GCC_VERSION is defined into lz4.h */
-#if (LZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
-#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
-#else
-#  define expect(expr,value)    (expr)
-#endif
-
-#define likely(expr)     expect((expr) != 0, 1)
-#define unlikely(expr)   expect((expr) != 0, 0)
-
-
-/**************************************
-*  Memory routines
-**************************************/
-#include <stdlib.h>   /* malloc, calloc, free */
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
-#include <string.h>   /* memset, memcpy */
-#define MEM_INIT       memset
-
-
-/**************************************
-*  Basic Types
-**************************************/
-#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
-# include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char       BYTE;
-  typedef unsigned short      U16;
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-  typedef unsigned long long  U64;
-#endif
-
-
-/**************************************
-*  Reading and writing into memory
-**************************************/
-#define STEPSIZE sizeof(size_t)
-
-static unsigned LZ4_64bits(void) { return sizeof(void*)==8; }
-
-static unsigned LZ4_isLittleEndian(void)
-{
-    const union { U32 i; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */
-    return one.c[0];
-}
-
-
-static U16 LZ4_read16(const void* memPtr)
-{
-    U16 val16;
-    memcpy(&val16, memPtr, 2);
-    return val16;
-}
-
-static U16 LZ4_readLE16(const void* memPtr)
-{
-    if (LZ4_isLittleEndian())
-    {
-        return LZ4_read16(memPtr);
-    }
-    else
-    {
-        const BYTE* p = (const BYTE*)memPtr;
-        return (U16)((U16)p[0] + (p[1]<<8));
-    }
-}
-
-static void LZ4_writeLE16(void* memPtr, U16 value)
-{
-    if (LZ4_isLittleEndian())
-    {
-        memcpy(memPtr, &value, 2);
-    }
-    else
-    {
-        BYTE* p = (BYTE*)memPtr;
-        p[0] = (BYTE) value;
-        p[1] = (BYTE)(value>>8);
-    }
-}
-
-static U32 LZ4_read32(const void* memPtr)
-{
-    U32 val32;
-    memcpy(&val32, memPtr, 4);
-    return val32;
-}
-
-static U64 LZ4_read64(const void* memPtr)
-{
-    U64 val64;
-    memcpy(&val64, memPtr, 8);
-    return val64;
-}
-
-static size_t LZ4_read_ARCH(const void* p)
-{
-    if (LZ4_64bits())
-        return (size_t)LZ4_read64(p);
-    else
-        return (size_t)LZ4_read32(p);
-}
-
-
-static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); }
-
-static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); }
-
-/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
-static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
-{
-    BYTE* d = (BYTE*)dstPtr;
-    const BYTE* s = (const BYTE*)srcPtr;
-    BYTE* e = (BYTE*)dstEnd;
-    do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
-}
-
-
-/**************************************
-*  Common Constants
-**************************************/
-#define MINMATCH 4
-
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-static const int LZ4_minLength = (MFLIMIT+1);
-
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-
-/**************************************
-*  Common Utils
-**************************************/
-#define LZ4_STATIC_ASSERT(c)    { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
-
-
-/**************************************
-*  Common functions
-**************************************/
-static unsigned LZ4_NbCommonBytes (register size_t val)
-{
-    if (LZ4_isLittleEndian())
-    {
-        if (LZ4_64bits())
-        {
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanForward64( &r, (U64)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctzll((U64)val) >> 3);
-#       else
-            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#       endif
-        }
-        else /* 32 bits */
-        {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r;
-            _BitScanForward( &r, (U32)val );
-            return (int)(r>>3);
-#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_ctz((U32)val) >> 3);
-#       else
-            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#       endif
-        }
-    }
-    else   /* Big Endian CPU */
-    {
-        if (LZ4_64bits())
-        {
-#       if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanReverse64( &r, val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clzll((U64)val) >> 3);
-#       else
-            unsigned r;
-            if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-            r += (!val);
-            return r;
-#       endif
-        }
-        else /* 32 bits */
-        {
-#       if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            unsigned long r = 0;
-            _BitScanReverse( &r, (unsigned long)val );
-            return (unsigned)(r>>3);
-#       elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT)
-            return (__builtin_clz((U32)val) >> 3);
-#       else
-            unsigned r;
-            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-            r += (!val);
-            return r;
-#       endif
-        }
-    }
-}
-
-static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
-{
-    const BYTE* const pStart = pIn;
-
-    while (likely(pIn<pInLimit-(STEPSIZE-1)))
-    {
-        size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
-        pIn += LZ4_NbCommonBytes(diff);
-        return (unsigned)(pIn - pStart);
-    }
-
-    if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
-    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
-    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
-    return (unsigned)(pIn - pStart);
-}
-
-
-#ifndef LZ4_COMMONDEFS_ONLY
-/**************************************
-*  Local Constants
-**************************************/
-#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
-#define HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
-
-static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
-static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
-
-
-/**************************************
-*  Local Structures and types
-**************************************/
-typedef struct {
-    U32 hashTable[HASH_SIZE_U32];
-    U32 currentOffset;
-    U32 initCheck;
-    const BYTE* dictionary;
-    BYTE* bufferStart;   /* obsolete, used for slideInputBuffer */
-    U32 dictSize;
-} LZ4_stream_t_internal;
-
-typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
-
-typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
-typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
-
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
-
-/**************************************
-*  Local Utils
-**************************************/
-int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
-int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
-
-
-
-/********************************
-*  Compression functions
-********************************/
-
-static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType)
-{
-    if (tableType == byU16)
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
-    else
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
-}
-
-static const U64 prime5bytes = 889523592379ULL;
-static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType)
-{
-    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
-    const U32 hashMask = (1<<hashLog) - 1;
-    return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
-}
-
-static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
-{
-    if (LZ4_64bits())
-        return LZ4_hashSequence64(sequence, tableType);
-    return LZ4_hashSequence((U32)sequence, tableType);
-}
-
-static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); }
-
-static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
-{
-    switch (tableType)
-    {
-    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
-    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
-    }
-}
-
-static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
-}
-
-static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
-}
-
-static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
-}
-
-FORCE_INLINE int LZ4_compress_generic(
-                 void* const ctx,
-                 const char* const source,
-                 char* const dest,
-                 const int inputSize,
-                 const int maxOutputSize,
-                 const limitedOutput_directive outputLimited,
-                 const tableType_t tableType,
-                 const dict_directive dict,
-                 const dictIssue_directive dictIssue,
-                 const U32 acceleration)
-{
-    LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx;
-
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* base;
-    const BYTE* lowLimit;
-    const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
-    const BYTE* const dictionary = dictPtr->dictionary;
-    const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
-    const size_t dictDelta = dictEnd - (const BYTE*)source;
-    const BYTE* anchor = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const olimit = op + maxOutputSize;
-
-    U32 forwardH;
-    size_t refDelta=0;
-
-    /* Init conditions */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;   /* Unsupported input size, too large (or negative) */
-    switch(dict)
-    {
-    case noDict:
-    default:
-        base = (const BYTE*)source;
-        lowLimit = (const BYTE*)source;
-        break;
-    case withPrefix64k:
-        base = (const BYTE*)source - dictPtr->currentOffset;
-        lowLimit = (const BYTE*)source - dictPtr->dictSize;
-        break;
-    case usingExtDict:
-        base = (const BYTE*)source - dictPtr->currentOffset;
-        lowLimit = (const BYTE*)source;
-        break;
-    }
-    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (inputSize<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    LZ4_putPosition(ip, ctx, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; )
-    {
-        const BYTE* match;
-        BYTE* token;
-        {
-            const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = acceleration << LZ4_skipTrigger;
-
-            /* Find a match */
-            do {
-                U32 h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimit)) goto _last_literals;
-
-                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
-                if (dict==usingExtDict)
-                {
-                    if (match<(const BYTE*)source)
-                    {
-                        refDelta = dictDelta;
-                        lowLimit = dictionary;
-                    }
-                    else
-                    {
-                        refDelta = 0;
-                        lowLimit = (const BYTE*)source;
-                    }
-                }
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
-
-            } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
-                || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) );
-        }
-
-        /* Catch up */
-        while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
-
-        {
-            /* Encode Literal length */
-            unsigned litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
-                return 0;   /* Check output limit */
-            if (litLength>=RUN_MASK)
-            {
-                int len = (int)litLength-RUN_MASK;
-                *token=(RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
-            op+=litLength;
-        }
-
-_next_match:
-        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
-
-        /* Encode MatchLength */
-        {
-            unsigned matchLength;
-
-            if ((dict==usingExtDict) && (lowLimit==dictionary))
-            {
-                const BYTE* limit;
-                match += refDelta;
-                limit = ip + (dictEnd-match);
-                if (limit > matchlimit) limit = matchlimit;
-                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
-                ip += MINMATCH + matchLength;
-                if (ip==limit)
-                {
-                    unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit);
-                    matchLength += more;
-                    ip += more;
-                }
-            }
-            else
-            {
-                matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-                ip += MINMATCH + matchLength;
-            }
-
-            if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
-                return 0;    /* Check output limit */
-            if (matchLength>=ML_MASK)
-            {
-                *token += ML_MASK;
-                matchLength -= ML_MASK;
-                for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
-                if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
-                *op++ = (BYTE)matchLength;
-            }
-            else *token += (BYTE)(matchLength);
-        }
-
-        anchor = ip;
-
-        /* Test end of chunk */
-        if (ip > mflimit) break;
-
-        /* Fill table */
-        LZ4_putPosition(ip-2, ctx, tableType, base);
-
-        /* Test next position */
-        match = LZ4_getPosition(ip, ctx, tableType, base);
-        if (dict==usingExtDict)
-        {
-            if (match<(const BYTE*)source)
-            {
-                refDelta = dictDelta;
-                lowLimit = dictionary;
-            }
-            else
-            {
-                refDelta = 0;
-                lowLimit = (const BYTE*)source;
-            }
-        }
-        LZ4_putPosition(ip, ctx, tableType, base);
-        if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
-            && (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {
-        const size_t lastRun = (size_t)(iend - anchor);
-        if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
-            return 0;   /* Check output limit */
-        if (lastRun >= RUN_MASK)
-        {
-            size_t accumulator = lastRun - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        }
-        else
-        {
-            *op++ = (BYTE)(lastRun<<ML_BITS);
-        }
-        memcpy(op, anchor, lastRun);
-        op += lastRun;
-    }
-
-    /* End */
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
-{
-    LZ4_resetStream((LZ4_stream_t*)state);
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
-
-    if (maxOutputSize >= LZ4_compressBound(inputSize))
-    {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16,                        noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
-    }
-    else
-    {
-        if (inputSize < LZ4_64Klimit)
-            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
-        else
-            return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
-    }
-}
-
-
-int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
-{
-#if (HEAPMODE)
-    void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
-#else
-    LZ4_stream_t ctx;
-    void* ctxPtr = &ctx;
-#endif
-
-    int result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
-
-#if (HEAPMODE)
-    FREEMEM(ctxPtr);
-#endif
-    return result;
-}
-
-
-int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
-}
-
-
-/* hidden debug function */
-/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
-int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
-{
-    LZ4_stream_t ctx;
-
-    LZ4_resetStream(&ctx);
-
-    if (inputSize < LZ4_64Klimit)
-        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16,                        noDict, noDictIssue, acceleration);
-    else
-        return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
-}
-
-
-/********************************
-*  destSize variant
-********************************/
-
-static int LZ4_compress_destSize_generic(
-                       void* const ctx,
-                 const char* const src,
-                       char* const dst,
-                       int*  const srcSizePtr,
-                 const int targetDstSize,
-                 const tableType_t tableType)
-{
-    const BYTE* ip = (const BYTE*) src;
-    const BYTE* base = (const BYTE*) src;
-    const BYTE* lowLimit = (const BYTE*) src;
-    const BYTE* anchor = ip;
-    const BYTE* const iend = ip + *srcSizePtr;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + targetDstSize;
-    BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
-    BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
-    BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
-
-    U32 forwardH;
-
-
-    /* Init conditions */
-    if (targetDstSize < 1) return 0;                                     /* Impossible to store anything */
-    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;            /* Unsupported input size, too large (or negative) */
-    if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0;   /* Size too large (not within 64K limit) */
-    if (*srcSizePtr<LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
-
-    /* First Byte */
-    *srcSizePtr = 0;
-    LZ4_putPosition(ip, ctx, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    /* Main Loop */
-    for ( ; ; )
-    {
-        const BYTE* match;
-        BYTE* token;
-        {
-            const BYTE* forwardIp = ip;
-            unsigned step = 1;
-            unsigned searchMatchNb = 1 << LZ4_skipTrigger;
-
-            /* Find a match */
-            do {
-                U32 h = forwardH;
-                ip = forwardIp;
-                forwardIp += step;
-                step = (searchMatchNb++ >> LZ4_skipTrigger);
-
-                if (unlikely(forwardIp > mflimit))
-                    goto _last_literals;
-
-                match = LZ4_getPositionOnHash(h, ctx, tableType, base);
-                forwardH = LZ4_hashPosition(forwardIp, tableType);
-                LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
-
-            } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
-                || (LZ4_read32(match) != LZ4_read32(ip)) );
-        }
-
-        /* Catch up */
-        while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
-
-        {
-            /* Encode Literal length */
-            unsigned litLength = (unsigned)(ip - anchor);
-            token = op++;
-            if (op + ((litLength+240)/255) + litLength > oMaxLit)
-            {
-                /* Not enough space for a last match */
-                op--;
-                goto _last_literals;
-            }
-            if (litLength>=RUN_MASK)
-            {
-                unsigned len = litLength - RUN_MASK;
-                *token=(RUN_MASK<<ML_BITS);
-                for(; len >= 255 ; len-=255) *op++ = 255;
-                *op++ = (BYTE)len;
-            }
-            else *token = (BYTE)(litLength<<ML_BITS);
-
-            /* Copy Literals */
-            LZ4_wildCopy(op, anchor, op+litLength);
-            op += litLength;
-        }
-
-_next_match:
-        /* Encode Offset */
-        LZ4_writeLE16(op, (U16)(ip-match)); op+=2;
-
-        /* Encode MatchLength */
-        {
-            size_t matchLength;
-
-            matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
-
-            if (op + ((matchLength+240)/255) > oMaxMatch)
-            {
-                /* Match description too long : reduce it */
-                matchLength = (15-1) + (oMaxMatch-op) * 255;
-            }
-            //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
-            ip += MINMATCH + matchLength;
-
-            if (matchLength>=ML_MASK)
-            {
-                *token += ML_MASK;
-                matchLength -= ML_MASK;
-                while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
-                *op++ = (BYTE)matchLength;
-            }
-            else *token += (BYTE)(matchLength);
-        }
-
-        anchor = ip;
-
-        /* Test end of block */
-        if (ip > mflimit) break;
-        if (op > oMaxSeq) break;
-
-        /* Fill table */
-        LZ4_putPosition(ip-2, ctx, tableType, base);
-
-        /* Test next position */
-        match = LZ4_getPosition(ip, ctx, tableType, base);
-        LZ4_putPosition(ip, ctx, tableType, base);
-        if ( (match+MAX_DISTANCE>=ip)
-            && (LZ4_read32(match)==LZ4_read32(ip)) )
-        { token=op++; *token=0; goto _next_match; }
-
-        /* Prepare next loop */
-        forwardH = LZ4_hashPosition(++ip, tableType);
-    }
-
-_last_literals:
-    /* Encode Last Literals */
-    {
-        size_t lastRunSize = (size_t)(iend - anchor);
-        if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend)
-        {
-            /* adapt lastRunSize to fill 'dst' */
-            lastRunSize  = (oend-op) - 1;
-            lastRunSize -= (lastRunSize+240)/255;
-        }
-        ip = anchor + lastRunSize;
-
-        if (lastRunSize >= RUN_MASK)
-        {
-            size_t accumulator = lastRunSize - RUN_MASK;
-            *op++ = RUN_MASK << ML_BITS;
-            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
-            *op++ = (BYTE) accumulator;
-        }
-        else
-        {
-            *op++ = (BYTE)(lastRunSize<<ML_BITS);
-        }
-        memcpy(op, anchor, lastRunSize);
-        op += lastRunSize;
-    }
-
-    /* End */
-    *srcSizePtr = (int) (((const char*)ip)-src);
-    return (int) (((char*)op)-dst);
-}
-
-
-static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
-{
-    LZ4_resetStream((LZ4_stream_t*)state);
-
-    if (targetDstSize >= LZ4_compressBound(*srcSizePtr))   /* compression success is guaranteed */
-    {
-        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
-    }
-    else
-    {
-        if (*srcSizePtr < LZ4_64Klimit)
-            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
-        else
-            return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr);
-    }
-}
-
-
-int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
-{
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(1, sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
-#else
-    LZ4_stream_t ctxBody;
-    void* ctx = &ctxBody;
-#endif
-
-    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-
-
-/********************************
-*  Streaming functions
-********************************/
-
-LZ4_stream_t* LZ4_createStream(void)
-{
-    LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64);
-    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
-    LZ4_resetStream(lz4s);
-    return lz4s;
-}
-
-void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
-{
-    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
-}
-
-int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
-{
-    FREEMEM(LZ4_stream);
-    return (0);
-}
-
-
-#define HASH_UNIT sizeof(size_t)
-int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
-{
-    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
-    const BYTE* p = (const BYTE*)dictionary;
-    const BYTE* const dictEnd = p + dictSize;
-    const BYTE* base;
-
-    if ((dict->initCheck) || (dict->currentOffset > 1 GB))  /* Uninitialized structure, or reuse overflow */
-        LZ4_resetStream(LZ4_dict);
-
-    if (dictSize < (int)HASH_UNIT)
-    {
-        dict->dictionary = NULL;
-        dict->dictSize = 0;
-        return 0;
-    }
-
-    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
-    dict->currentOffset += 64 KB;
-    base = p - dict->currentOffset;
-    dict->dictionary = p;
-    dict->dictSize = (U32)(dictEnd - p);
-    dict->currentOffset += dict->dictSize;
-
-    while (p <= dictEnd-HASH_UNIT)
-    {
-        LZ4_putPosition(p, dict->hashTable, byU32, base);
-        p+=3;
-    }
-
-    return dict->dictSize;
-}
-
-
-static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src)
-{
-    if ((LZ4_dict->currentOffset > 0x80000000) ||
-        ((size_t)LZ4_dict->currentOffset > (size_t)src))   /* address space overflow */
-    {
-        /* rescale hash table */
-        U32 delta = LZ4_dict->currentOffset - 64 KB;
-        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
-        int i;
-        for (i=0; i<HASH_SIZE_U32; i++)
-        {
-            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
-            else LZ4_dict->hashTable[i] -= delta;
-        }
-        LZ4_dict->currentOffset = 64 KB;
-        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
-        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
-    }
-}
-
-
-int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
-{
-    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
-
-    const BYTE* smallest = (const BYTE*) source;
-    if (streamPtr->initCheck) return 0;   /* Uninitialized structure detected */
-    if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
-    LZ4_renormDictT(streamPtr, smallest);
-    if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
-
-    /* Check overlapping input/dictionary space */
-    {
-        const BYTE* sourceEnd = (const BYTE*) source + inputSize;
-        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
-        {
-            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
-            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
-            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
-            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
-        }
-    }
-
-    /* prefix mode : source data follows dictionary */
-    if (dictEnd == (const BYTE*)source)
-    {
-        int result;
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
-        else
-            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
-        streamPtr->dictSize += (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
-        return result;
-    }
-
-    /* external dictionary mode */
-    {
-        int result;
-        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
-            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
-        else
-            result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
-        streamPtr->dictionary = (const BYTE*)source;
-        streamPtr->dictSize = (U32)inputSize;
-        streamPtr->currentOffset += (U32)inputSize;
-        return result;
-    }
-}
-
-
-/* Hidden debug function, to force external dictionary mode */
-int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize)
-{
-    LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict;
-    int result;
-    const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
-
-    const BYTE* smallest = dictEnd;
-    if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
-    LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest);
-
-    result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
-
-    streamPtr->dictionary = (const BYTE*)source;
-    streamPtr->dictSize = (U32)inputSize;
-    streamPtr->currentOffset += (U32)inputSize;
-
-    return result;
-}
-
-
-int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
-{
-    LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict;
-    const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
-
-    if ((U32)dictSize > 64 KB) dictSize = 64 KB;   /* useless to define a dictionary > 64 KB */
-    if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
-
-    memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
-
-    dict->dictionary = (const BYTE*)safeBuffer;
-    dict->dictSize = (U32)dictSize;
-
-    return dictSize;
-}
-
-
-
-/*******************************
-*  Decompression functions
-*******************************/
-/*
- * This generic decompression function cover all use cases.
- * It shall be instantiated several times, using different sets of directives
- * Note that it is essential this generic function is really inlined,
- * in order to remove useless branches during compilation optimization.
- */
-FORCE_INLINE int LZ4_decompress_generic(
-                 const char* const source,
-                 char* const dest,
-                 int inputSize,
-                 int outputSize,         /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
-
-                 int endOnInput,         /* endOnOutputSize, endOnInputSize */
-                 int partialDecoding,    /* full, partial */
-                 int targetOutputSize,   /* only used if partialDecoding==partial */
-                 int dict,               /* noDict, withPrefix64k, usingExtDict */
-                 const BYTE* const lowPrefix,  /* == dest if dict == noDict */
-                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
-                 const size_t dictSize         /* note : = 0 if noDict */
-                 )
-{
-    /* Local Variables */
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + outputSize;
-    BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;
-    const BYTE* const lowLimit = lowPrefix - dictSize;
-
-    const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
-    const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
-    const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-
-    const int safeDecode = (endOnInput==endOnInputSize);
-    const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
-
-
-    /* Special cases */
-    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                         /* targetOutputSize too high => decode everything */
-    if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1;  /* Empty output buffer */
-    if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
-
-
-    /* Main Loop */
-    while (1)
-    {
-        unsigned token;
-        size_t length;
-        const BYTE* match;
-
-        /* get literal length */
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)
-        {
-            unsigned s;
-            do
-            {
-                s = *ip++;
-                length += s;
-            }
-            while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
-            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error;   /* overflow detection */
-            if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error;   /* overflow detection */
-        }
-
-        /* copy literals */
-        cpy = op+length;
-        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
-        {
-            if (partialDecoding)
-            {
-                if (cpy > oend) goto _output_error;                           /* Error : write attempt beyond end of output buffer */
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   /* Error : read attempt beyond end of input buffer */
-            }
-            else
-            {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       /* Error : block decoding must stop exactly there */
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   /* Error : input must be consumed */
-            }
-            memcpy(op, ip, length);
-            ip += length;
-            op += length;
-            break;     /* Necessarily EOF, due to parsing restrictions */
-        }
-        LZ4_wildCopy(op, ip, cpy);
-        ip += length; op = cpy;
-
-        /* get offset */
-        match = cpy - LZ4_readLE16(ip); ip+=2;
-        if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error;   /* Error : offset outside destination buffer */
-
-        /* get matchlength */
-        length = token & ML_MASK;
-        if (length == ML_MASK)
-        {
-            unsigned s;
-            do
-            {
-                if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
-                s = *ip++;
-                length += s;
-            } while (s==255);
-            if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error;   /* overflow detection */
-        }
-        length += MINMATCH;
-
-        /* check external dictionary */
-        if ((dict==usingExtDict) && (match < lowPrefix))
-        {
-            if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error;   /* doesn't respect parsing restriction */
-
-            if (length <= (size_t)(lowPrefix-match))
-            {
-                /* match can be copied as a single segment from external dictionary */
-                match = dictEnd - (lowPrefix-match);
-                memmove(op, match, length); op += length;
-            }
-            else
-            {
-                /* match encompass external dictionary and current segment */
-                size_t copySize = (size_t)(lowPrefix-match);
-                memcpy(op, dictEnd - copySize, copySize);
-                op += copySize;
-                copySize = length - copySize;
-                if (copySize > (size_t)(op-lowPrefix))   /* overlap within current segment */
-                {
-                    BYTE* const endOfMatch = op + copySize;
-                    const BYTE* copyFrom = lowPrefix;
-                    while (op < endOfMatch) *op++ = *copyFrom++;
-                }
-                else
-                {
-                    memcpy(op, lowPrefix, copySize);
-                    op += copySize;
-                }
-            }
-            continue;
-        }
-
-        /* copy repeated sequence */
-        cpy = op + length;
-        if (unlikely((op-match)<8))
-        {
-            const size_t dec64 = dec64table[op-match];
-            op[0] = match[0];
-            op[1] = match[1];
-            op[2] = match[2];
-            op[3] = match[3];
-            match += dec32table[op-match];
-            LZ4_copy4(op+4, match);
-            op += 8; match -= dec64;
-        } else { LZ4_copy8(op, match); op+=8; match+=8; }
-
-        if (unlikely(cpy>oend-12))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    /* Error : last LASTLITERALS bytes must be literals */
-            if (op < oend-8)
-            {
-                LZ4_wildCopy(op, match, oend-8);
-                match += (oend-8) - op;
-                op = oend-8;
-            }
-            while (op<cpy) *op++ = *match++;
-        }
-        else
-            LZ4_wildCopy(op, match, cpy);
-        op=cpy;   /* correction */
-    }
-
-    /* end of decoding */
-    if (endOnInput)
-       return (int) (((char*)op)-dest);     /* Nb of output bytes decoded */
-    else
-       return (int) (((const char*)ip)-source);   /* Nb of input bytes read */
-
-    /* Overflow error detected */
-_output_error:
-    return (int) (-(((const char*)ip)-source))-1;
-}
-
-
-int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
-}
-
-int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
-}
-
-int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
-}
-
-
-/* streaming decompression functions */
-
-typedef struct
-{
-    const BYTE* externalDict;
-    size_t extDictSize;
-    const BYTE* prefixEnd;
-    size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode()
- * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
- */
-LZ4_streamDecode_t* LZ4_createStreamDecode(void)
-{
-    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t));
-    return lz4s;
-}
-
-int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
-{
-    FREEMEM(LZ4_stream);
-    return 0;
-}
-
-/*
- * LZ4_setStreamDecode
- * Use this function to instruct where to find the dictionary
- * This function is not necessary if previous data is still available where it was decoded.
- * Loading a size of 0 is allowed (same effect as no dictionary).
- * Return : 1 if OK, 0 if error
- */
-int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
-{
-    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
-    lz4sd->prefixSize = (size_t) dictSize;
-    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
-    lz4sd->externalDict = NULL;
-    lz4sd->extDictSize  = 0;
-    return 1;
-}
-
-/*
-*_continue() :
-    These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks must still be available at the memory position where they were decoded.
-    If it's not possible, save the relevant part of decoded data into a safe buffer,
-    and indicate where it stands using LZ4_setStreamDecode()
-*/
-int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
-    int result;
-
-    if (lz4sd->prefixEnd == (BYTE*)dest)
-    {
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize += result;
-        lz4sd->prefixEnd  += result;
-    }
-    else
-    {
-        lz4sd->extDictSize = lz4sd->prefixSize;
-        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
-                                        endOnInputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = result;
-        lz4sd->prefixEnd  = (BYTE*)dest + result;
-    }
-
-    return result;
-}
-
-int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
-{
-    LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode;
-    int result;
-
-    if (lz4sd->prefixEnd == (BYTE*)dest)
-    {
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize += originalSize;
-        lz4sd->prefixEnd  += originalSize;
-    }
-    else
-    {
-        lz4sd->extDictSize = lz4sd->prefixSize;
-        lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
-        result = LZ4_decompress_generic(source, dest, 0, originalSize,
-                                        endOnOutputSize, full, 0,
-                                        usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
-        if (result <= 0) return result;
-        lz4sd->prefixSize = originalSize;
-        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
-    }
-
-    return result;
-}
-
-
-/*
-Advanced decoding functions :
-*_usingDict() :
-    These decoding functions work the same as "_continue" ones,
-    the dictionary must be explicitly provided within parameters
-*/
-
-FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
-{
-    if (dictSize==0)
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
-    if (dictStart+dictSize == dest)
-    {
-        if (dictSize >= (int)(64 KB - 1))
-            return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
-        return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
-    }
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
-}
-
-int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
-}
-
-/* debug function */
-int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
-}
-
-
-/***************************************************
-*  Obsolete Functions
-***************************************************/
-/* obsolete compression functions */
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); }
-int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); }
-int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
-int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); }
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); }
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); }
-
-/*
-These function names are deprecated and should no longer be used.
-They are only provided here for compatibility with older user programs.
-- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
-- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
-*/
-int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
-
-/* Obsolete Streaming functions */
-
-int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
-
-static void LZ4_init(LZ4_stream_t_internal* lz4ds, BYTE* base)
-{
-    MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE);
-    lz4ds->bufferStart = base;
-}
-
-int LZ4_resetStreamState(void* state, char* inputBuffer)
-{
-    if ((((size_t)state) & 3) != 0) return 1;   /* Error : pointer is not aligned on 4-bytes boundary */
-    LZ4_init((LZ4_stream_t_internal*)state, (BYTE*)inputBuffer);
-    return 0;
-}
-
-void* LZ4_create (char* inputBuffer)
-{
-    void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64);
-    LZ4_init ((LZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer);
-    return lz4ds;
-}
-
-char* LZ4_slideInputBuffer (void* LZ4_Data)
-{
-    LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data;
-    int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB);
-    return (char*)(ctx->bufferStart + dictSize);
-}
-
-/* Obsolete streaming decompression functions */
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
-}
-
-#endif   /* LZ4_COMMONDEFS_ONLY */
-
diff --git a/src/bitshuffle/lz4/lz4.h b/src/bitshuffle/lz4/lz4.h
deleted file mode 100644
index 3e740022..00000000
--- a/src/bitshuffle/lz4/lz4.h
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2015, Yann Collet.
-
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-#pragma once
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/*
- * lz4.h provides block compression functions, and gives full buffer control to programmer.
- * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
- * and can let the library handle its own memory, please use lz4frame.h instead.
-*/
-
-/**************************************
-*  Version
-**************************************/
-#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
-#define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
-#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
-int LZ4_versionNumber (void);
-
-/**************************************
-*  Tuning parameter
-**************************************/
-/*
- * LZ4_MEMORY_USAGE :
- * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
- * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
- */
-#define LZ4_MEMORY_USAGE 14
-
-
-/**************************************
-*  Simple Functions
-**************************************/
-
-int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
-int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-
-/*
-LZ4_compress_default() :
-    Compresses 'sourceSize' bytes from buffer 'source'
-    into already allocated 'dest' buffer of size 'maxDestSize'.
-    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
-    It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'source' into a more limited 'dest' budget,
-    compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dest' content is not valid.
-    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
-        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
-        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails
-
-LZ4_decompress_safe() :
-    compressedSize : is the precise full size of the compressed block.
-    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
-             If destination buffer is not large enough, decoding will stop and output an error code (<0).
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
-*/
-
-
-/**************************************
-*  Advanced Functions
-**************************************/
-#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-
-/*
-LZ4_compressBound() :
-    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
-    This function is primarily useful for memory allocation purposes (destination buffer size).
-    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
-        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
-        return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
-*/
-int LZ4_compressBound(int inputSize);
-
-/*
-LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
-    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
-    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
-    An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
-*/
-int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
-
-
-/*
-LZ4_compress_fast_extState() :
-    Same compression function, just using an externally allocated memory space to store compression state.
-    Use LZ4_sizeofState() to know how much memory must be allocated,
-    and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
-*/
-int LZ4_sizeofState(void);
-int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
-
-
-/*
-LZ4_compress_destSize() :
-    Reverse the logic, by compressing as much data as possible from 'source' buffer
-    into already allocated buffer 'dest' of size 'targetDestSize'.
-    This function either compresses the entire 'source' content into 'dest' if it's large enough,
-    or fill 'dest' buffer completely with as much data as possible from 'source'.
-        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
-                         New value is necessarily <= old value.
-        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
-              or 0 if compression fails
-*/
-int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
-
-
-/*
-LZ4_decompress_fast() :
-    originalSize : is the original and therefore uncompressed size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes.
-    note : This function fully respect memory boundaries for properly formed compressed data.
-           It is a bit faster than LZ4_decompress_safe().
-           However, it does not provide any protection against intentionally modified data stream (malicious input).
-           Use this function in trusted environment only (data to decode comes from a trusted source).
-*/
-int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
-
-/*
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'compressedSize' at position 'source'
-    into destination buffer 'dest' of size 'maxDecompressedSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
-*/
-int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize);
-
-
-/***********************************************
-*  Streaming Compression Functions
-***********************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
-/*
- * LZ4_stream_t
- * information structure to track an LZ4 stream.
- * important : init this structure content before first use !
- * note : only allocated directly the structure if you are statically linking LZ4
- *        If you are using liblz4 as a DLL, please use below construction methods instead.
- */
-typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
-
-/*
- * LZ4_resetStream
- * Use this function to init an allocated LZ4_stream_t structure
- */
-void LZ4_resetStream (LZ4_stream_t* streamPtr);
-
-/*
- * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
- * LZ4_freeStream releases its memory.
- * In the context of a DLL (liblz4), please use these methods rather than the static struct.
- * They are more future proof, in case of a change of LZ4_stream_t size.
- */
-LZ4_stream_t* LZ4_createStream(void);
-int           LZ4_freeStream (LZ4_stream_t* streamPtr);
-
-/*
- * LZ4_loadDict
- * Use this function to load a static dictionary into LZ4_stream.
- * Any previous data will be forgotten, only 'dictionary' will remain in memory.
- * Loading a size of 0 is allowed.
- * Return : dictionary size, in bytes (necessarily <= 64 KB)
- */
-int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
-
-/*
- * LZ4_compress_fast_continue
- * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
- * Important : Previous data blocks are assumed to still be present and unmodified !
- * 'dst' buffer must be already allocated.
- * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
- */
-int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
-
-/*
- * LZ4_saveDict
- * If previously compressed data block is not guaranteed to remain available at its memory location
- * save it into a safer place (char* safeBuffer)
- * Note : you don't need to call LZ4_loadDict() afterwards,
- *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
- * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
- */
-int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
-
-
-/************************************************
-*  Streaming Decompression Functions
-************************************************/
-
-#define LZ4_STREAMDECODESIZE_U64  4
-#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
-typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
-/*
- * LZ4_streamDecode_t
- * information structure to track an LZ4 stream.
- * init this structure content using LZ4_setStreamDecode or memset() before first use !
- *
- * In the context of a DLL (liblz4) please prefer usage of construction methods below.
- * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
- * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
- * LZ4_freeStreamDecode releases its memory.
- */
-LZ4_streamDecode_t* LZ4_createStreamDecode(void);
-int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-
-/*
- * LZ4_setStreamDecode
- * Use this function to instruct where to find the dictionary.
- * Setting a size of 0 is allowed (same effect as reset).
- * Return : 1 if OK, 0 if error
- */
-int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
-
-/*
-*_continue() :
-    These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
-    In the case of a ring buffers, decoding buffer must be either :
-    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
-      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
-    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
-      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including small ones ( < 64 KB).
-    - _At least_ 64 KB + 8 bytes + maxBlockSize.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including larger than decoding buffer.
-    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
-    and indicate where it is saved using LZ4_setStreamDecode()
-*/
-int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
-int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
-
-
-/*
-Advanced decoding functions :
-*_usingDict() :
-    These decoding functions work the same as
-    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
-    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
-*/
-int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
-int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
-
-
-
-/**************************************
-*  Obsolete Functions
-**************************************/
-/* Deprecate Warnings */
-/* Should these warnings messages be a problem,
-   it is generally possible to disable them,
-   with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual for example.
-   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
-#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
-#  endif
-#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
-
-/* Obsolete compression functions */
-/* These functions are planned to start generate warnings by r131 approximately */
-int LZ4_compress               (const char* source, char* dest, int sourceSize);
-int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/* Obsolete decompression functions */
-/* These function names are completely deprecated and must no longer be used.
-   They are only provided here for compatibility with older programs.
-    - LZ4_uncompress is the same as LZ4_decompress_fast
-    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
-   These function prototypes are now disabled; uncomment them only if you really need them.
-   It is highly recommended to stop using these prototypes and migrate to maintained ones */
-/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
-/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
-
-/* Obsolete streaming functions; use new streaming interface whenever possible */
-LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
-LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
-
-/* Obsolete streaming decoding functions */
-LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/src/bitshuffle/lzf/LICENSE.txt b/src/bitshuffle/lzf/LICENSE.txt
deleted file mode 100644
index 3787a007..00000000
--- a/src/bitshuffle/lzf/LICENSE.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-Copyright Notice and Statement for LZF filter
-
-Copyright (c) 2008-2009 Andrew Collette
-http://h5py.alfven.org
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-a. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-b. Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the
-   distribution.
-
-c. Neither the name of the author nor the names of contributors may 
-   be used to endorse or promote products derived from this software 
-   without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/src/bitshuffle/lzf/README.txt b/src/bitshuffle/lzf/README.txt
deleted file mode 100644
index c6ad62c3..00000000
--- a/src/bitshuffle/lzf/README.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-===============================
-LZF filter for HDF5, revision 3
-===============================
-
-The LZF filter provides high-speed compression with acceptable compression
-performance, resulting in much faster performance than DEFLATE, at the
-cost of a slightly lower compression ratio. It's appropriate for large
-datasets of low to moderate complexity, for which some compression is
-much better than none, but for which the speed of DEFLATE is unacceptable.
-
-This filter has been tested against HDF5 versions 1.6.5 through 1.8.3.  It
-is released under the BSD license (see LICENSE.txt for details).
-
-
-Using the filter from HDF5
---------------------------
-
-There is exactly one new public function declared in lzf_filter.h, with
-the following signature:
-
-    int register_lzf(void)
-
-Calling this will register the filter with the HDF5 library.  A non-negative
-return value indicates success.  If the registration fails, an error is pushed
-onto the current error stack and a negative value is returned.
-
-It's strongly recommended to use the SHUFFLE filter with LZF, as it's
-cheap, supported by all current versions of HDF5, and can significantly
-improve the compression ratio.  An example C program ("example.c") is included
-which demonstrates the proper use of the filter.
-
-
-Compiling
----------
-
-The filter consists of a single .c file and header, along with an embedded
-version of the LZF compression library.  Since the filter is stateless, it's
-recommended to statically link the entire thing into your program; for
-example:
-
-    $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog
-
-It can also be built as a shared library, although you will have to install
-the resulting library somewhere the runtime linker can find it:
-
-    $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so
-
-A similar procedure should be used for building C++ code.  As in these
-examples, using option -O1 or higher is strongly recommended for increased
-performance.
-
-
-Contact
--------
-
-This filter is maintained as part of the HDF5 for Python (h5py) project.  The
-goal of h5py is to provide access to the majority of the HDF5 C API and feature
-set from Python.  The most recent version of h5py (1.1) includes the LZF
-filter by default.
-
-* Downloads and bug tracker:        http://h5py.googlecode.com
-
-* Main web site and documentation:  http://h5py.alfven.org
-
-* Contact email:  h5py at alfven dot org
-
-
-History of changes
-------------------
-
-Revision 3 (6/25/09)
-    Fix issue with changed filter struct definition under HDF5 1.8.3.
-
-Revision 2
-    Minor speed enhancement.
-
-Revision 1
-    Initial release.
-
-
-
-
-
-
diff --git a/src/bitshuffle/lzf/README_bitshuffle.txt b/src/bitshuffle/lzf/README_bitshuffle.txt
deleted file mode 100644
index d620a925..00000000
--- a/src/bitshuffle/lzf/README_bitshuffle.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org).
-The version included with bitshuffle is from version 2.3 of h5py with no
-modifications other than the addition of this README.
diff --git a/src/bitshuffle/lzf/example.c b/src/bitshuffle/lzf/example.c
deleted file mode 100644
index 23dd776c..00000000
--- a/src/bitshuffle/lzf/example.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
-    Copyright (C) 2009 Andrew Collette
-    http://h5py.alfven.org
-    License: BSD (see LICENSE.txt)
-
-    Example program demonstrating use of the LZF filter from C code.
-
-    To compile this program:
-
-    h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example
-
-    To run:
-
-    $ ./example
-    Success!
-    $ h5ls -v test_lzf.hdf5 
-    Opened "test_lzf.hdf5" with sec2 driver.
-    dset                     Dataset {100/100, 100/100, 100/100}
-        Location:  0:1:0:976
-        Links:     1
-        Modified:  2009-02-15 16:35:11 PST
-        Chunks:    {1, 100, 100} 40000 bytes
-        Storage:   4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization
-        Filter-0:  shuffle-2 OPT {4}
-        Filter-1:  lzf-32000 OPT {1, 261, 40000}
-        Type:      native float
-*/
-
-#include <stdio.h>
-#include "hdf5.h"
-#include "lzf_filter.h"
-
-#define SIZE 100*100*100
-#define SHAPE {100,100,100}
-#define CHUNKSHAPE {1,100,100}
-
-int main(){
-
-    static float data[SIZE];
-    static float data_out[SIZE];
-    const hsize_t shape[] = SHAPE;
-    const hsize_t chunkshape[] = CHUNKSHAPE;
-    int r, i;
-    int return_code = 1;
-
-    hid_t fid, sid, dset, plist = 0;
-
-    for(i=0; i<SIZE; i++){
-        data[i] = i;
-    }
-
-    /* Register the filter with the library */
-    r = register_lzf();
-    if(r<0) goto failed;
-
-    sid = H5Screate_simple(3, shape, NULL);
-    if(sid<0) goto failed;
-
-    fid = H5Fcreate("test_lzf.hdf5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
-    if(fid<0) goto failed;
-
-    plist = H5Pcreate(H5P_DATASET_CREATE);
-    if(plist<0) goto failed;
-
-    /* Chunked layout required for filters */
-    r = H5Pset_chunk(plist, 3, chunkshape);
-    if(r<0) goto failed;
-
-    /* Use of the shuffle filter VASTLY improves performance of this
-       and other block-oriented compression filters.  Be sure to add
-       this before the compression filter!
-    */
-    r = H5Pset_shuffle(plist);
-    if(r<0) goto failed;
-
-    /* Note the "optional" flag is necessary, as with the DEFLATE filter */
-    r = H5Pset_filter(plist, H5PY_FILTER_LZF, H5Z_FLAG_OPTIONAL, 0, NULL);
-    if(r<0) goto failed;
-
-    dset = H5Dcreate(fid, "dset", H5T_NATIVE_FLOAT, sid, plist);
-    if(dset<0) goto failed;
-    
-    r = H5Dwrite(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data);
-    if(r<0) goto failed;
-
-    r = H5Dread(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data_out);
-    if(r<0) goto failed;
-
-    for(i=0;i<SIZE;i++){
-        if(data[i] != data_out[i]) goto failed;
-    }
-
-    fprintf(stdout, "Success!\n");
-
-    return_code = 0;
-
-    failed:
-
-    if(dset>0)  H5Dclose(dset);
-    if(sid>0)   H5Sclose(sid);
-    if(plist>0) H5Pclose(plist);
-    if(fid>0)   H5Fclose(fid);
-
-    return return_code;
-}
-
diff --git a/src/bitshuffle/lzf/lzf/lzf.h b/src/bitshuffle/lzf/lzf/lzf.h
deleted file mode 100644
index 919b6e6b..00000000
--- a/src/bitshuffle/lzf/lzf/lzf.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
- * 
- * Redistribution and use in source and binary forms, with or without modifica-
- * tion, are permitted provided that the following conditions are met:
- * 
- *   1.  Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimer.
- * 
- *   2.  Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
- * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Alternatively, the contents of this file may be used under the terms of
- * the GNU General Public License ("GPL") version 2 or any later version,
- * in which case the provisions of the GPL are applicable instead of
- * the above. If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use your
- * version of this file under the BSD license, indicate your decision
- * by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL. If you do not delete the
- * provisions above, a recipient may use your version of this file under
- * either the BSD or the GPL.
- */
-
-#ifndef LZF_H
-#define LZF_H
-
-/***********************************************************************
-**
-**	lzf -- an extremely fast/free compression/decompression-method
-**	http://liblzf.plan9.de/
-**
-**	This algorithm is believed to be patent-free.
-**
-***********************************************************************/
-
-#define LZF_VERSION 0x0105 /* 1.5, API version */
-
-/*
- * Compress in_len bytes stored at the memory block starting at
- * in_data and write the result to out_data, up to a maximum length
- * of out_len bytes.
- *
- * If the output buffer is not large enough or any error occurs return 0,
- * otherwise return the number of bytes used, which might be considerably
- * more than in_len (but less than 104% of the original size), so it
- * makes sense to always use out_len == in_len - 1), to ensure _some_
- * compression, and store the data uncompressed otherwise (with a flag, of
- * course.
- *
- * lzf_compress might use different algorithms on different systems and
- * even different runs, thus might result in different compressed strings
- * depending on the phase of the moon or similar factors. However, all
- * these strings are architecture-independent and will result in the
- * original data when decompressed using lzf_decompress.
- *
- * The buffers must not be overlapping.
- *
- * If the option LZF_STATE_ARG is enabled, an extra argument must be
- * supplied which is not reflected in this header file. Refer to lzfP.h
- * and lzf_c.c.
- *
- */
-unsigned int 
-lzf_compress (const void *const in_data,  unsigned int in_len,
-              void             *out_data, unsigned int out_len);
-
-/*
- * Decompress data compressed with some version of the lzf_compress
- * function and stored at location in_data and length in_len. The result
- * will be stored at out_data up to a maximum of out_len characters.
- *
- * If the output buffer is not large enough to hold the decompressed
- * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
- * of decompressed bytes (i.e. the original length of the data) is
- * returned.
- *
- * If an error in the compressed data is detected, a zero is returned and
- * errno is set to EINVAL.
- *
- * This function is very fast, about as fast as a copying loop.
- */
-unsigned int 
-lzf_decompress (const void *const in_data,  unsigned int in_len,
-                void             *out_data, unsigned int out_len);
-
-#endif
-
diff --git a/src/bitshuffle/lzf/lzf/lzfP.h b/src/bitshuffle/lzf/lzf/lzfP.h
deleted file mode 100644
index 8414da4d..00000000
--- a/src/bitshuffle/lzf/lzf/lzfP.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
- * 
- * Redistribution and use in source and binary forms, with or without modifica-
- * tion, are permitted provided that the following conditions are met:
- * 
- *   1.  Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimer.
- * 
- *   2.  Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
- * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Alternatively, the contents of this file may be used under the terms of
- * the GNU General Public License ("GPL") version 2 or any later version,
- * in which case the provisions of the GPL are applicable instead of
- * the above. If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use your
- * version of this file under the BSD license, indicate your decision
- * by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL. If you do not delete the
- * provisions above, a recipient may use your version of this file under
- * either the BSD or the GPL.
- */
-
-#ifndef LZFP_h
-#define LZFP_h
-
-#define STANDALONE 1 /* at the moment, this is ok. */
-
-#ifndef STANDALONE
-# include "lzf.h"
-#endif
-
-/*
- * Size of hashtable is (1 << HLOG) * sizeof (char *)
- * decompression is independent of the hash table size
- * the difference between 15 and 14 is very small
- * for small blocks (and 14 is usually a bit faster).
- * For a low-memory/faster configuration, use HLOG == 13;
- * For best compression, use 15 or 16 (or more, up to 23).
- */
-#ifndef HLOG
-# define HLOG 17  /* Avoid pathological case at HLOG=16   A.C. 2/15/09 */
-#endif
-
-/*
- * Sacrifice very little compression quality in favour of compression speed.
- * This gives almost the same compression as the default code, and is
- * (very roughly) 15% faster. This is the preferred mode of operation.
- */
-#ifndef VERY_FAST
-# define VERY_FAST 1
-#endif
-
-/*
- * Sacrifice some more compression quality in favour of compression speed.
- * (roughly 1-2% worse compression for large blocks and
- * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
- * In short: when in need for speed, enable this for binary data,
- * possibly disable this for text data.
- */
-#ifndef ULTRA_FAST
-# define ULTRA_FAST 1
-#endif
-
-/*
- * Unconditionally aligning does not cost very much, so do it if unsure
- */
-#ifndef STRICT_ALIGN
-# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
-#endif
-
-/*
- * You may choose to pre-set the hash table (might be faster on some
- * modern cpus and large (>>64k) blocks, and also makes compression
- * deterministic/repeatable when the configuration otherwise is the same).
- */
-#ifndef INIT_HTAB
-# define INIT_HTAB 0
-#endif
-
-/* =======================================================================
-    Changing things below this line may break the HDF5 LZF filter.
-    A.C. 2/15/09
-   =======================================================================
-*/
-
-/*
- * Avoid assigning values to errno variable? for some embedding purposes
- * (linux kernel for example), this is neccessary. NOTE: this breaks
- * the documentation in lzf.h.
- */
-#ifndef AVOID_ERRNO
-# define AVOID_ERRNO 0
-#endif
-
-/*
- * Wether to pass the LZF_STATE variable as argument, or allocate it
- * on the stack. For small-stack environments, define this to 1.
- * NOTE: this breaks the prototype in lzf.h.
- */
-#ifndef LZF_STATE_ARG
-# define LZF_STATE_ARG 0
-#endif
-
-/*
- * Wether to add extra checks for input validity in lzf_decompress
- * and return EINVAL if the input stream has been corrupted. This
- * only shields against overflowing the input buffer and will not
- * detect most corrupted streams.
- * This check is not normally noticable on modern hardware
- * (<1% slowdown), but might slow down older cpus considerably.
- */
-
-#ifndef CHECK_INPUT
-# define CHECK_INPUT 1
-#endif
-
-/*****************************************************************************/
-/* nothing should be changed below */
-
-typedef unsigned char u8;
-
-typedef const u8 *LZF_STATE[1 << (HLOG)];
-
-#if !STRICT_ALIGN
-/* for unaligned accesses we need a 16 bit datatype. */
-# include <limits.h>
-# if USHRT_MAX == 65535
-    typedef unsigned short u16;
-# elif UINT_MAX == 65535
-    typedef unsigned int u16;
-# else
-#  undef STRICT_ALIGN
-#  define STRICT_ALIGN 1
-# endif
-#endif
-
-#if ULTRA_FAST
-# if defined(VERY_FAST)
-#  undef VERY_FAST
-# endif
-#endif
-
-#if INIT_HTAB
-# ifdef __cplusplus
-#  include <cstring>
-# else
-#  include <string.h>
-# endif
-#endif
-
-#endif
-
diff --git a/src/bitshuffle/lzf/lzf/lzf_c.c b/src/bitshuffle/lzf/lzf/lzf_c.c
deleted file mode 100644
index fbfd4cce..00000000
--- a/src/bitshuffle/lzf/lzf/lzf_c.c
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
- * 
- * Redistribution and use in source and binary forms, with or without modifica-
- * tion, are permitted provided that the following conditions are met:
- * 
- *   1.  Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimer.
- * 
- *   2.  Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
- * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Alternatively, the contents of this file may be used under the terms of
- * the GNU General Public License ("GPL") version 2 or any later version,
- * in which case the provisions of the GPL are applicable instead of
- * the above. If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use your
- * version of this file under the BSD license, indicate your decision
- * by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL. If you do not delete the
- * provisions above, a recipient may use your version of this file under
- * either the BSD or the GPL.
- */
-
-#include "lzfP.h"
-
-#define HSIZE (1 << (HLOG))
-
-/*
- * don't play with this unless you benchmark!
- * decompression is not dependent on the hash function
- * the hashing function might seem strange, just believe me
- * it works ;)
- */
-#ifndef FRST
-# define FRST(p) (((p[0]) << 8) | p[1])
-# define NEXT(v,p) (((v) << 8) | p[2])
-# if ULTRA_FAST
-#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
-# elif VERY_FAST
-#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
-# else
-#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
-# endif
-#endif
-/*
- * IDX works because it is very similar to a multiplicative hash, e.g.
- * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
- * the latter is also quite fast on newer CPUs, and compresses similarly.
- *
- * the next one is also quite good, albeit slow ;)
- * (int)(cos(h & 0xffffff) * 1e6)
- */
-
-#if 0
-/* original lzv-like hash function, much worse and thus slower */
-# define FRST(p) (p[0] << 5) ^ p[1]
-# define NEXT(v,p) ((v) << 5) ^ p[2]
-# define IDX(h) ((h) & (HSIZE - 1))
-#endif
-
-#define        MAX_LIT        (1 <<  5)
-#define        MAX_OFF        (1 << 13)
-#define        MAX_REF        ((1 << 8) + (1 << 3))
-
-#if __GNUC__ >= 3
-# define expect(expr,value)         __builtin_expect ((expr),(value))
-# define inline                     inline
-#else
-# define expect(expr,value)         (expr)
-# define inline                     static
-#endif
-
-#define expect_false(expr) expect ((expr) != 0, 0)
-#define expect_true(expr)  expect ((expr) != 0, 1)
-
-/*
- * compressed format
- *
- * 000LLLLL <L+1>    ; literal
- * LLLooooo oooooooo ; backref L
- * 111ooooo LLLLLLLL oooooooo ; backref L+7
- *
- */
-
-unsigned int
-lzf_compress (const void *const in_data, unsigned int in_len,
-	      void *out_data, unsigned int out_len
-#if LZF_STATE_ARG
-              , LZF_STATE htab
-#endif
-              )
-{
-#if !LZF_STATE_ARG
-  LZF_STATE htab;
-#endif
-  const u8 **hslot;
-  const u8 *ip = (const u8 *)in_data;
-        u8 *op = (u8 *)out_data;
-  const u8 *in_end  = ip + in_len;
-        u8 *out_end = op + out_len;
-  const u8 *ref;
-
-  /* off requires a type wide enough to hold a general pointer difference.
-   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
-   * works for differences within a single object). We also assume that no
-   * no bit pattern traps. Since the only platform that is both non-POSIX
-   * and fails to support both assumptions is windows 64 bit, we make a
-   * special workaround for it.
-   */
-#if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64)
-  unsigned _int64 off; /* workaround for missing POSIX compliance */
-#else
-  unsigned long off;
-#endif
-  unsigned int hval;
-  int lit;
-
-  if (!in_len || !out_len)
-    return 0;
-
-#if INIT_HTAB
-  memset (htab, 0, sizeof (htab));
-# if 0
-  for (hslot = htab; hslot < htab + HSIZE; hslot++)
-    *hslot++ = ip;
-# endif
-#endif
-
-  lit = 0; op++; /* start run */
-
-  hval = FRST (ip);
-  while (ip < in_end - 2)
-    {
-      hval = NEXT (hval, ip);
-      hslot = htab + IDX (hval);
-      ref = *hslot; *hslot = ip;
-
-      if (1
-#if INIT_HTAB
-          && ref < ip /* the next test will actually take care of this, but this is faster */
-#endif
-          && (off = ip - ref - 1) < MAX_OFF
-          && ip + 4 < in_end
-          && ref > (u8 *)in_data
-#if STRICT_ALIGN
-          && ref[0] == ip[0]
-          && ref[1] == ip[1]
-          && ref[2] == ip[2]
-#else
-          && *(u16 *)ref == *(u16 *)ip
-          && ref[2] == ip[2]
-#endif
-        )
-        {
-          /* match found at *ref++ */
-          unsigned int len = 2;
-          unsigned int maxlen = in_end - ip - len;
-          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
-
-          if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
-            if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
-              return 0;
-
-          op [- lit - 1] = lit - 1; /* stop run */
-          op -= !lit; /* undo run if length is zero */
-
-          for (;;)
-            {
-              if (expect_true (maxlen > 16))
-                {
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                  len++; if (ref [len] != ip [len]) break;
-                }
-
-              do
-                len++;
-              while (len < maxlen && ref[len] == ip[len]);
-
-              break;
-            }
-
-          len -= 2; /* len is now #octets - 1 */
-          ip++;
-
-          if (len < 7)
-            {
-              *op++ = (off >> 8) + (len << 5);
-            }
-          else
-            {
-              *op++ = (off >> 8) + (  7 << 5);
-              *op++ = len - 7;
-            }
-
-          *op++ = off;
-          lit = 0; op++; /* start run */
-
-          ip += len + 1;
-
-          if (expect_false (ip >= in_end - 2))
-            break;
-
-#if ULTRA_FAST || VERY_FAST
-          --ip;
-# if VERY_FAST && !ULTRA_FAST
-          --ip;
-# endif
-          hval = FRST (ip);
-
-          hval = NEXT (hval, ip);
-          htab[IDX (hval)] = ip;
-          ip++;
-
-# if VERY_FAST && !ULTRA_FAST
-          hval = NEXT (hval, ip);
-          htab[IDX (hval)] = ip;
-          ip++;
-# endif
-#else
-          ip -= len + 1;
-
-          do
-            {
-              hval = NEXT (hval, ip);
-              htab[IDX (hval)] = ip;
-              ip++;
-            }
-          while (len--);
-#endif
-        }
-      else
-        {
-          /* one more literal byte we must copy */
-          if (expect_false (op >= out_end))
-            return 0;
-
-          lit++; *op++ = *ip++;
-
-          if (expect_false (lit == MAX_LIT))
-            {
-              op [- lit - 1] = lit - 1; /* stop run */
-              lit = 0; op++; /* start run */
-            }
-        }
-    }
-
-  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
-    return 0;
-
-  while (ip < in_end)
-    {
-      lit++; *op++ = *ip++;
-
-      if (expect_false (lit == MAX_LIT))
-        {
-          op [- lit - 1] = lit - 1; /* stop run */
-          lit = 0; op++; /* start run */
-        }
-    }
-
-  op [- lit - 1] = lit - 1; /* end run */
-  op -= !lit; /* undo run if length is zero */
-
-  return op - (u8 *)out_data;
-}
-
diff --git a/src/bitshuffle/lzf/lzf/lzf_d.c b/src/bitshuffle/lzf/lzf/lzf_d.c
deleted file mode 100644
index 2e2eedaa..00000000
--- a/src/bitshuffle/lzf/lzf/lzf_d.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
- * 
- * Redistribution and use in source and binary forms, with or without modifica-
- * tion, are permitted provided that the following conditions are met:
- * 
- *   1.  Redistributions of source code must retain the above copyright notice,
- *       this list of conditions and the following disclaimer.
- * 
- *   2.  Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
- * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
- * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
- * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
- * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Alternatively, the contents of this file may be used under the terms of
- * the GNU General Public License ("GPL") version 2 or any later version,
- * in which case the provisions of the GPL are applicable instead of
- * the above. If you wish to allow the use of your version of this file
- * only under the terms of the GPL and not to allow others to use your
- * version of this file under the BSD license, indicate your decision
- * by deleting the provisions above and replace them with the notice
- * and other provisions required by the GPL. If you do not delete the
- * provisions above, a recipient may use your version of this file under
- * either the BSD or the GPL.
- */
-
-#include "lzfP.h"
-
-#if AVOID_ERRNO
-# define SET_ERRNO(n)
-#else
-# include <errno.h>
-# define SET_ERRNO(n) errno = (n)
-#endif
-
-/* ASM is slower than C in HDF5 tests -- A.C. 2/5/09
-#ifndef __STRICT_ANSI__
-#ifndef H5PY_DISABLE_LZF_ASM
-#if (__i386 || __amd64) && __GNUC__ >= 3
-# define lzf_movsb(dst, src, len)                \
-   asm ("rep movsb"                              \
-        : "=D" (dst), "=S" (src), "=c" (len)     \
-        :  "0" (dst),  "1" (src),  "2" (len));
-#endif
-#endif
-#endif
-*/
-
-unsigned int 
-lzf_decompress (const void *const in_data,  unsigned int in_len,
-                void             *out_data, unsigned int out_len)
-{
-  u8 const *ip = (const u8 *)in_data;
-  u8       *op = (u8 *)out_data;
-  u8 const *const in_end  = ip + in_len;
-  u8       *const out_end = op + out_len;
-
-  do
-    {
-      unsigned int ctrl = *ip++;
-
-      if (ctrl < (1 << 5)) /* literal run */
-        {
-          ctrl++;
-
-          if (op + ctrl > out_end)
-            {
-              SET_ERRNO (E2BIG);
-              return 0;
-            }
-
-#if CHECK_INPUT
-          if (ip + ctrl > in_end)
-            {
-              SET_ERRNO (EINVAL);
-              return 0;
-            }
-#endif
-
-#ifdef lzf_movsb
-          lzf_movsb (op, ip, ctrl);
-#else
-          do
-            *op++ = *ip++;
-          while (--ctrl);
-#endif
-        }
-      else /* back reference */
-        {
-          unsigned int len = ctrl >> 5;
-
-          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
-
-#if CHECK_INPUT
-          if (ip >= in_end)
-            {
-              SET_ERRNO (EINVAL);
-              return 0;
-            }
-#endif
-          if (len == 7)
-            {
-              len += *ip++;
-#if CHECK_INPUT
-              if (ip >= in_end)
-                {
-                  SET_ERRNO (EINVAL);
-                  return 0;
-                }
-#endif
-            }
-
-          ref -= *ip++;
-
-          if (op + len + 2 > out_end)
-            {
-              SET_ERRNO (E2BIG);
-              return 0;
-            }
-
-          if (ref < (u8 *)out_data)
-            {
-              SET_ERRNO (EINVAL);
-              return 0;
-            }
-
-#ifdef lzf_movsb
-          len += 2;
-          lzf_movsb (op, ref, len);
-#else
-          *op++ = *ref++;
-          *op++ = *ref++;
-
-          do
-            *op++ = *ref++;
-          while (--len);
-#endif
-        }
-    }
-  while (ip < in_end);
-
-  return op - (u8 *)out_data;
-}
-
diff --git a/src/bitshuffle/lzf/lzf_filter.c b/src/bitshuffle/lzf/lzf_filter.c
deleted file mode 100644
index c6dd4b0e..00000000
--- a/src/bitshuffle/lzf/lzf_filter.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/***** Preamble block *********************************************************
-* 
-* This file is part of h5py, a low-level Python interface to the HDF5 library.
-* 
-* Copyright (C) 2008 Andrew Collette
-* http://h5py.alfven.org
-* License: BSD  (See LICENSE.txt for full license)
-* 
-* $Date$
-* 
-****** End preamble block ****************************************************/
-
-/*
-    Implements an LZF filter module for HDF5, using the BSD-licensed library
-    by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html).
-
-    No Python-specific code is used.  The filter behaves like the DEFLATE
-    filter, in that it is called for every type and space, and returns 0
-    if the data cannot be compressed.
-
-    The only public function is (int) register_lzf(void), which passes on
-    the result from H5Zregister.
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
-#include "hdf5.h"
-#include "lzf/lzf.h"
-#include "lzf_filter.h"
-
-/* Our own versions of H5Epush_sim, as it changed in 1.8 */
-#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7
-
-#define PUSH_ERR(func, minor, str)  H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
-#define H5PY_GET_FILTER H5Pget_filter_by_id
-
-#else
-
-#define PUSH_ERR(func, minor, str)  H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
-#define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL)
-
-#endif
-
-/*  Deal with the mutiple definitions for H5Z_class_t.
-    Note: Only HDF5 1.6 and 1.8 are supported.
-
-    (1) The old class should always be used for HDF5 1.6
-    (2) The new class should always be used for HDF5 1.8 < 1.8.3
-    (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the
-        macro H5_USE_16_API is set
-*/
-
-#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API)
-#define H5PY_H5Z_NEWCLS 1
-#else
-#define H5PY_H5Z_NEWCLS 0   
-#endif
-
-size_t lzf_filter(unsigned flags, size_t cd_nelmts,
-		    const unsigned cd_values[], size_t nbytes,
-		    size_t *buf_size, void **buf);
-
-herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
-
-
-/* Try to register the filter, passing on the HDF5 return value */
-int register_lzf(void){
-
-    int retval;
-
-#if H5PY_H5Z_NEWCLS
-    H5Z_class_t filter_class = {
-        H5Z_CLASS_T_VERS,
-        (H5Z_filter_t)(H5PY_FILTER_LZF),
-        1, 1,
-        "lzf",
-        NULL,
-        (H5Z_set_local_func_t)(lzf_set_local),
-        (H5Z_func_t)(lzf_filter)
-    };
-#else
-    H5Z_class_t filter_class = {
-        (H5Z_filter_t)(H5PY_FILTER_LZF),
-        "lzf",
-        NULL,
-        (H5Z_set_local_func_t)(lzf_set_local),
-        (H5Z_func_t)(lzf_filter)
-    };
-#endif
-
-    retval = H5Zregister(&filter_class);
-    if(retval<0){
-        PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter");
-    }
-    return retval;
-}
-
-/*  Filter setup.  Records the following inside the DCPL:
-
-    1.  If version information is not present, set slots 0 and 1 to the filter
-        revision and LZF API version, respectively.
-
-    2. Compute the chunk size in bytes and store it in slot 2.
-*/
-herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){
-
-    int ndims;
-    int i;
-    herr_t r;
-
-    unsigned int bufsize;
-    hsize_t chunkdims[32];
-
-    unsigned int flags;
-    size_t nelements = 8;
-    unsigned values[] = {0,0,0,0,0,0,0,0};
-
-    r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL);
-    if(r<0) return -1;
-
-    if(nelements < 3) nelements = 3;  /* First 3 slots reserved.  If any higher
-                                      slots are used, preserve the contents. */
-
-    /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be
-       careful not to clobber any existing version info */
-    if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION;
-    if(values[1]==0) values[1] = LZF_VERSION;
-
-    ndims = H5Pget_chunk(dcpl, 32, chunkdims);
-    if(ndims<0) return -1;
-    if(ndims>32){
-        PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit");
-        return -1;
-    }
-
-    bufsize = H5Tget_size(type);
-    if(bufsize==0) return -1;
-
-    for(i=0;i<ndims;i++){
-        bufsize *= chunkdims[i];
-    }
-
-    values[2] = bufsize;
-
-#ifdef H5PY_LZF_DEBUG
-    fprintf(stderr, "LZF: Computed buffer size %d\n", bufsize);
-#endif
-
-    r = H5Pmodify_filter(dcpl, H5PY_FILTER_LZF, flags, nelements, values);
-    if(r<0) return -1;
-
-    return 1;
-}
-
-
-/* The filter function */
-size_t lzf_filter(unsigned flags, size_t cd_nelmts,
-		    const unsigned cd_values[], size_t nbytes,
-		    size_t *buf_size, void **buf){
-
-    void* outbuf = NULL;
-    size_t outbuf_size = 0;
-
-    unsigned int status = 0;        /* Return code from lzf routines */
-
-    /* We're compressing */
-    if(!(flags & H5Z_FLAG_REVERSE)){
-
-        /* Allocate an output buffer exactly as long as the input data; if
-           the result is larger, we simply return 0.  The filter is flagged
-           as optional, so HDF5 marks the chunk as uncompressed and
-           proceeds.
-        */
-
-        outbuf_size = (*buf_size);
-        outbuf = malloc(outbuf_size);
-
-        if(outbuf == NULL){
-            PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate compression buffer");
-            goto failed;
-        }
-
-        status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
-
-    /* We're decompressing */
-    } else {
-
-        if((cd_nelmts>=3)&&(cd_values[2]!=0)){
-            outbuf_size = cd_values[2];   /* Precomputed buffer guess */
-        }else{
-            outbuf_size = (*buf_size);
-        }
-
-#ifdef H5PY_LZF_DEBUG
-        fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size);
-#endif
-
-        while(!status){
-            
-            free(outbuf);
-            outbuf = malloc(outbuf_size);
-
-            if(outbuf == NULL){
-                PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer");
-                goto failed;
-            }
-
-            status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
-
-            if(!status){    /* compression failed */
-
-                if(errno == E2BIG){
-                    outbuf_size += (*buf_size);
-#ifdef H5PY_LZF_DEBUG
-                    fprintf(stderr, "    Too small: %d\n", outbuf_size);
-#endif
-                } else if(errno == EINVAL) {
-
-                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
-                    goto failed;
-
-                } else {
-                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
-                    goto failed;
-                }
-
-            } /* if !status */
-
-        } /* while !status */
-
-    } /* compressing vs decompressing */
-
-    if(status != 0){
-
-        free(*buf);
-        *buf = outbuf;
-        *buf_size = outbuf_size;
-
-        return status;  /* Size of compressed/decompressed data */
-    } 
-
-    failed:
-
-    free(outbuf);
-    return 0;
-
-} /* End filter function */
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/bitshuffle/lzf/lzf_filter.h b/src/bitshuffle/lzf/lzf_filter.h
deleted file mode 100644
index 27dff83a..00000000
--- a/src/bitshuffle/lzf/lzf_filter.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/***** Preamble block *********************************************************
-* 
-* This file is part of h5py, a low-level Python interface to the HDF5 library.
-* 
-* Copyright (C) 2008 Andrew Collette
-* http://h5py.alfven.org
-* License: BSD  (See LICENSE.txt for full license)
-* 
-* $Date$
-* 
-****** End preamble block ****************************************************/
-
-
-#ifndef H5PY_LZF_H
-#define H5PY_LZF_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Filter revision number, starting at 1 */
-#define H5PY_FILTER_LZF_VERSION 4
-
-/* Filter ID registered with the HDF Group as of 2/6/09.  For maintenance
-   requests, contact the filter author directly. */
-#define H5PY_FILTER_LZF 32000
-
-/* Register the filter with the library. Returns a negative value on failure, 
-   and a non-negative value on success.
-*/
-int register_lzf(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
-
diff --git a/src/bitshuffle/requirements.txt b/src/bitshuffle/requirements.txt
deleted file mode 100644
index 2f0d0fbb..00000000
--- a/src/bitshuffle/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Order matters
-setuptools>=0.7
-Cython>=0.19
-numpy>=1.6.1
-h5py>=2.4.0 --no-binary=h5py
diff --git a/src/bitshuffle/setup.cfg.example b/src/bitshuffle/setup.cfg.example
deleted file mode 100644
index 6bd2ccfb..00000000
--- a/src/bitshuffle/setup.cfg.example
+++ /dev/null
@@ -1,10 +0,0 @@
-[install]
-# These control the installation of the hdf5 dynamically loaded filter plugin.
-h5plugin = 0
-h5plugin-dir = /usr/local/hdf5/lib/plugin
-
-[build_ext]
-# Whether to compile with OpenMP multi-threading. Default is system dependant:
-# False on OSX (since the clang compiler does not yet support OpenMP) and True
-# otherwise.
-omp = 1
diff --git a/src/bitshuffle/setup.py b/src/bitshuffle/setup.py
deleted file mode 100644
index 830991cd..00000000
--- a/src/bitshuffle/setup.py
+++ /dev/null
@@ -1,323 +0,0 @@
-from __future__ import absolute_import, division, print_function
-# I didn't import unicode_literals. They break setuptools or Cython in python
-# 2.7, but python 3 seems to be happy with them.
-
-import glob
-import os
-from os import path
-from setuptools import setup, Extension
-from setuptools.command.build_ext import build_ext as build_ext_
-from setuptools.command.develop import develop as develop_
-from setuptools.command.install import install as install_
-import shutil
-import subprocess
-import sys
-
-
-VERSION_MAJOR = 0
-VERSION_MINOR = 3
-VERSION_POINT = 5
-
-# Only unset in the 'release' branch and in tags.
-VERSION_DEV = 0
-
-VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT)
-if VERSION_DEV:
-    VERSION = VERSION + ".dev%d" % VERSION_DEV
-
-
-COMPILE_FLAGS = ['-O3', '-ffast-math', '-march=native', '-std=c99']
-# Cython breaks strict aliasing rules.
-COMPILE_FLAGS += ['-fno-strict-aliasing']
-COMPILE_FLAGS += ['-fPIC']
-COMPILE_FLAGS_MSVC = ['/Ox', '/fp:fast']
-
-MACROS = [
-    ('BSHUF_VERSION_MAJOR', VERSION_MAJOR),
-    ('BSHUF_VERSION_MINOR', VERSION_MINOR),
-    ('BSHUF_VERSION_POINT', VERSION_POINT),
-]
-
-
-H5PLUGINS_DEFAULT = '/usr/local/hdf5/lib/plugin'
-
-# OSX's clang compliler does not support OpenMP.
-if sys.platform == 'darwin':
-    OMP_DEFAULT = False
-else:
-    OMP_DEFAULT = True
-
-FALLBACK_CONFIG = {
-    'include_dirs': [],
-    'library_dirs': [],
-    'libraries': [],
-    'extra_compile_args': [],
-    'extra_link_args': [],
-}
-
-if 'HDF5_DIR' in os.environ:
-    FALLBACK_CONFIG['include_dirs'] += [os.environ['HDF5_DIR'] + '/include']  # macports
-    FALLBACK_CONFIG['library_dirs'] += [os.environ['HDF5_DIR'] + '/lib']      # macports
-elif sys.platform == 'darwin':
-    # putting here both macports and homebrew paths will generate
-    # "ld: warning: dir not found" at the linking phase
-    FALLBACK_CONFIG['include_dirs'] += ['/opt/local/include']  # macports
-    FALLBACK_CONFIG['library_dirs'] += ['/opt/local/lib']      # macports
-    FALLBACK_CONFIG['include_dirs'] += ['/usr/local/include']  # homebrew
-    FALLBACK_CONFIG['library_dirs'] += ['/usr/local/lib']      # homebrew
-elif sys.platform.startswith('freebsd'):
-    FALLBACK_CONFIG['include_dirs'] += ['/usr/local/include']  # homebrew
-    FALLBACK_CONFIG['library_dirs'] += ['/usr/local/lib']      # homebrew
-
-FALLBACK_CONFIG['include_dirs'] = [d for d in FALLBACK_CONFIG['include_dirs']
-                                   if path.isdir(d)]
-FALLBACK_CONFIG['library_dirs'] = [d for d in FALLBACK_CONFIG['library_dirs']
-                                   if path.isdir(d)]
-
-FALLBACK_CONFIG['extra_compile_args'] = ['-DH5_BUILT_AS_DYNAMIC_LIB']
-
-
-def pkgconfig(*packages, **kw):
-    config = kw.setdefault('config', {})
-    optional_args = kw.setdefault('optional', '')
-    flag_map = {'include_dirs': ['--cflags-only-I', 2],
-                'library_dirs': ['--libs-only-L', 2],
-                'libraries': ['--libs-only-l', 2],
-                'extra_compile_args': ['--cflags-only-other', 0],
-                'extra_link_args': ['--libs-only-other', 0],
-                }
-    for package in packages:
-        try:
-            subprocess.check_output(["pkg-config", package])
-        except (subprocess.CalledProcessError, OSError):
-            print("Can't find %s with pkg-config fallback to "
-                  "static config" % package)
-            for distutils_key in flag_map:
-                config.setdefault(distutils_key, []).extend(
-                    FALLBACK_CONFIG[distutils_key])
-            config['libraries'].append(package)
-        else:
-            for distutils_key, (pkg_option, n) in flag_map.items():
-                items = subprocess.check_output(
-                    ['pkg-config', optional_args, pkg_option, package]
-                ).decode('utf8').split()
-                opt = config.setdefault(distutils_key, [])
-                opt.extend([i[n:] for i in items])
-    return config
-
-
-ext_bshuf = Extension(
-    "bitshuffle.ext",
-    sources=["bitshuffle/ext.pyx", "src/bitshuffle.c",
-             "src/bitshuffle_core.c", "src/iochain.c",
-             "lz4/lz4.c"],
-    include_dirs=["src/", "lz4/"],
-    depends=["src/bitshuffle.h", "src/bitshuffle_core.h",
-             "src/iochain.h", "lz4/lz4.h"],
-    libraries=[],
-    define_macros=MACROS,
-)
-
-h5filter = Extension(
-    "bitshuffle.h5",
-    sources=["bitshuffle/h5.pyx", "src/bshuf_h5filter.c",
-             "src/bitshuffle.c", "src/bitshuffle_core.c",
-             "src/iochain.c", "lz4/lz4.c"],
-    depends=["src/bitshuffle.h", "src/bitshuffle_core.h",
-             "src/iochain.h", "src/bshuf_h5filter.h",
-             "lz4/lz4.h"],
-    define_macros=MACROS,
-    **pkgconfig("hdf5", config=dict(
-        include_dirs=["src/", "lz4/"]))
-)
-
-filter_plugin = Extension(
-    "bitshuffle.plugin.libh5bshuf",
-    sources=["src/bshuf_h5plugin.c", "src/bshuf_h5filter.c",
-             "src/bitshuffle.c", "src/bitshuffle_core.c",
-             "src/iochain.c", "lz4/lz4.c"],
-    depends=["src/bitshuffle.h", "src/bitshuffle_core.h",
-             "src/iochain.h", 'src/bshuf_h5filter.h',
-             "lz4/lz4.h"],
-    define_macros=MACROS,
-    **pkgconfig("hdf5", config=dict(
-        include_dirs=["src/", "lz4/"]))
-)
-
-lzf_plugin = Extension(
-    "bitshuffle.plugin.libh5LZF",
-    sources=["src/lzf_h5plugin.c", "lzf/lzf_filter.c",
-             "lzf/lzf/lzf_c.c", "lzf/lzf/lzf_d.c"],
-    depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h",
-             "lzf/lzf/lzfP.h"],
-    **pkgconfig("hdf5", config=dict(
-        include_dirs=["lzf/", "lzf/lzf/"]))
-)
-
-
-EXTENSIONS = [ext_bshuf, h5filter]
-# Check for plugin hdf5 plugin support (hdf5 >= 1.8.11)
-HDF5_PLUGIN_SUPPORT = False
-CPATHS = os.environ['CPATH'].split(':') if 'CPATH' in os.environ else []
-for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS:
-    if os.path.exists(os.path.join(p, "H5PLextern.h")):
-        HDF5_PLUGIN_SUPPORT = True
-
-if HDF5_PLUGIN_SUPPORT:
-    EXTENSIONS.extend([filter_plugin, lzf_plugin])
-
-
-class develop(develop_):
-    def run(self):
-        # Dummy directory for copying build plugins.
-        if not path.isdir('bitshuffle/plugin'):
-            os.mkdir('bitshuffle/plugin')
-        develop_.run(self)
-
-
-# Custom installation to include installing dynamic filters.
-class install(install_):
-    user_options = install_.user_options + [
-        ('h5plugin', None,
-         'Install HDF5 filter plugins for use outside of python.'),
-        ('h5plugin-dir=', None,
-         'Where to install filter plugins. Default %s.' % H5PLUGINS_DEFAULT),
-    ]
-
-    def initialize_options(self):
-        install_.initialize_options(self)
-        self.h5plugin = False
-        self.h5plugin_dir = H5PLUGINS_DEFAULT
-
-    def finalize_options(self):
-        install_.finalize_options(self)
-        if self.h5plugin not in ('0', '1', True, False):
-            raise ValueError("Invalid h5plugin argument. Mut be '0' or '1'.")
-        self.h5plugin = int(self.h5plugin)
-        self.h5plugin_dir = path.abspath(self.h5plugin_dir)
-
-    def run(self):
-        install_.run(self)
-        if self.h5plugin:
-            if not HDF5_PLUGIN_SUPPORT:
-                print("HDF5 < 1.8.11, not installing filter plugins.")
-                return
-            plugin_build = path.join(self.build_lib, "bitshuffle", "plugin")
-            try:
-                os.makedirs(self.h5plugin_dir)
-            except OSError as e:
-                if e.args[0] == 17:
-                    # Directory already exists, this is fine.
-                    pass
-                else:
-                    raise
-            plugin_libs = glob.glob(path.join(plugin_build, "*"))
-            for plugin_lib in plugin_libs:
-                plugin_name = path.split(plugin_lib)[1]
-                shutil.copy2(plugin_lib,
-                             path.join(self.h5plugin_dir, plugin_name))
-            print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir)
-
-
-# Command line or site.cfg specification of OpenMP.
-class build_ext(build_ext_):
-    user_options = build_ext_.user_options + [
-        ('omp=', None, "Whether to compile with OpenMP threading. Default"
-         " on current system is %s." % str(OMP_DEFAULT))
-    ]
-    boolean_options = build_ext_.boolean_options + ['omp']
-
-    def initialize_options(self):
-        build_ext_.initialize_options(self)
-        self.omp = OMP_DEFAULT
-
-    def finalize_options(self):
-        # For some reason this gets run twice. Careful to print messages and
-        # add arguments only one time.
-        build_ext_.finalize_options(self)
-
-        if self.omp not in ('0', '1', True, False):
-            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
-        self.omp = int(self.omp)
-
-        import numpy as np
-        ext_bshuf.include_dirs.append(np.get_include())
-
-        # Required only by old version of setuptools < 18.0
-        from Cython.Build import cythonize
-        self.extensions = cythonize(self.extensions)
-        for ext in self.extensions:
-            ext._needs_stub = False
-
-    def build_extensions(self):
-        c = self.compiler.compiler_type
-
-        if self.omp not in ('0', '1', True, False):
-            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
-        self.omp = int(self.omp)
-
-        if self.omp:
-            if not hasattr(self, "_printed_omp_message"):
-                self._printed_omp_message = True
-                print("\n#################################")
-                print("# Compiling with OpenMP support #")
-                print("#################################\n")
-            # More portable to pass -fopenmp to linker.
-            # self.libraries += ['gomp']
-            if self.compiler.compiler_type == 'msvc':
-                openmpflag = '/openmp'
-                compileflags = COMPILE_FLAGS_MSVC
-            else:
-                openmpflag = '-fopenmp'
-                compileflags = COMPILE_FLAGS
-            for e in self.extensions:
-                e.extra_compile_args = list(set(e.extra_compile_args).union(compileflags))
-                if openmpflag not in e.extra_compile_args:
-                    e.extra_compile_args += [openmpflag]
-                if openmpflag not in e.extra_link_args:
-                    e.extra_link_args += [openmpflag]
-
-        build_ext_.build_extensions(self)
-
-
-# Don't install numpy/cython/hdf5 if not needed
-for cmd in ["sdist", "clean",
-            "--help", "--help-commands", "--version"]:
-    if cmd in sys.argv:
-        setup_requires = []
-        break
-else:
-    setup_requires = ["Cython>=0.19", "numpy>=1.6.1"]
-
-with open('requirements.txt') as f:
-    requires = f.read().splitlines()
-    requires = [r.split()[0] for r in requires]
-
-with open('README.rst') as r:
-    long_description = r.read()
-
-# TODO hdf5 support should be an "extra". Figure out how to set this up.
-setup(
-    name='bitshuffle',
-    version=VERSION,
-
-    packages=['bitshuffle', 'bitshuffle.tests'],
-    scripts=[],
-    ext_modules=EXTENSIONS,
-    cmdclass={'build_ext': build_ext, 'install': install, 'develop': develop},
-    setup_requires=setup_requires,
-    install_requires=requires,
-    # extras_require={'H5':  ["h5py"]},
-    package_data={'': ['data/*']},
-
-    # metadata for upload to PyPI
-    author="Kiyoshi Wesley Masui",
-    author_email="kiyo@physics.ubc.ca",
-    description="Bitshuffle filter for improving typed data compression.",
-    long_description=long_description,
-    license="MIT",
-    url="https://github.com/kiyo-masui/bitshuffle",
-    download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s"
-                  % VERSION),
-    keywords=['compression', 'hdf5', 'numpy'],
-)
diff --git a/src/bitshuffle/src/bitshuffle.c b/src/bitshuffle/src/bitshuffle.c
deleted file mode 100644
index 54ff045f..00000000
--- a/src/bitshuffle/src/bitshuffle.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Bitshuffle - Filter for improving compression of typed binary data.
- *
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-#include "bitshuffle.h"
-#include "bitshuffle_core.h"
-#include "bitshuffle_internals.h"
-#include "lz4.h"
-
-#include <stdio.h>
-#include <string.h>
-
-
-// Constants.
-// Use fast decompression instead of safe decompression for LZ4.
-#define BSHUF_LZ4_DECOMPRESS_FAST
-
-
-// Macros.
-#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) {                      \
-    free(buf); return count - 1000; }
-
-
-/* Bitshuffle and compress a single block. */
-int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
-        const size_t size, const size_t elem_size) {
-
-    int64_t nbytes, count;
-    void *tmp_buf_bshuf;
-    void *tmp_buf_lz4;
-    size_t this_iter;
-    const void *in;
-    void *out;
-
-    tmp_buf_bshuf = malloc(size * elem_size);
-    if (tmp_buf_bshuf == NULL) return -1;
-
-    tmp_buf_lz4 = malloc(LZ4_compressBound(size * elem_size));
-    if (tmp_buf_lz4 == NULL){
-        free(tmp_buf_bshuf);
-        return -1;
-    }
-
-
-    in = ioc_get_in(C_ptr, &this_iter);
-    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
-
-    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
-    if (count < 0) {
-        free(tmp_buf_lz4);
-        free(tmp_buf_bshuf);
-        return count;
-    }
-    nbytes = LZ4_compress((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size);
-    free(tmp_buf_bshuf);
-    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
-
-    out = ioc_get_out(C_ptr, &this_iter);
-    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
-
-    bshuf_write_uint32_BE(out, nbytes);
-    memcpy((char *) out + 4, tmp_buf_lz4, nbytes);
-
-    free(tmp_buf_lz4);
-
-    return nbytes + 4;
-}
-
-
-/* Decompress and bitunshuffle a single block. */
-int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
-        const size_t size, const size_t elem_size) {
-
-    int64_t nbytes, count;
-    void *out, *tmp_buf;
-    const void *in;
-    size_t this_iter;
-    int32_t nbytes_from_header;
-
-    in = ioc_get_in(C_ptr, &this_iter);
-    nbytes_from_header = bshuf_read_uint32_BE(in);
-    ioc_set_next_in(C_ptr, &this_iter,
-            (void*) ((char*) in + nbytes_from_header + 4));
-
-    out = ioc_get_out(C_ptr, &this_iter);
-    ioc_set_next_out(C_ptr, &this_iter,
-            (void *) ((char *) out + size * elem_size));
-
-    tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-#ifdef BSHUF_LZ4_DECOMPRESS_FAST
-    nbytes = LZ4_decompress_fast((const char*) in + 4, (char*) tmp_buf, size * elem_size);
-    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
-    if (nbytes != nbytes_from_header) {
-        free(tmp_buf);
-        return -91;
-    }
-#else
-    nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
-                                 size * elem_size);
-    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
-    if (nbytes != size * elem_size) {
-        free(tmp_buf);
-        return -91;
-    }
-    nbytes = nbytes_from_header;
-#endif
-    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    nbytes += 4;
-
-    free(tmp_buf);
-    return nbytes;
-}
-
-
-/* ---- Public functions ----
- *
- * See header file for description and usage.
- *
- */
-
-size_t bshuf_compress_lz4_bound(const size_t size,
-        const size_t elem_size, size_t block_size) {
-
-    size_t bound, leftover;
-
-    if (block_size == 0) {
-        block_size = bshuf_default_block_size(elem_size);
-    }
-    if (block_size % BSHUF_BLOCKED_MULT) return -81;
-
-    // Note that each block gets a 4 byte header.
-    // Size of full blocks.
-    bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
-    // Size of partial blocks, if any.
-    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
-    if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
-    // Size of uncompressed data not fitting into any blocks.
-    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
-    return bound;
-}
-
-
-int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size) {
-    return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
-            elem_size, block_size);
-}
-
-
-int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size) {
-    return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
-            elem_size, block_size);
-}
-
diff --git a/src/bitshuffle/src/bitshuffle.h b/src/bitshuffle/src/bitshuffle.h
deleted file mode 100644
index 3df95f47..00000000
--- a/src/bitshuffle/src/bitshuffle.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Bitshuffle - Filter for improving compression of typed binary data.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- *
- * Header File
- *
- * Worker routines return an int64_t which is the number of bytes processed
- * if positive or an error code if negative.
- *
- * Error codes:
- *      -1    : Failed to allocate memory.
- *      -11   : Missing SSE.
- *      -12   : Missing AVX.
- *      -80   : Input size not a multiple of 8.
- *      -81   : block_size not multiple of 8.
- *      -91   : Decompression error, wrong number of bytes processed.
- *      -1YYY : Error internal to compression routine with error code -YYY.
- */
-
-
-#ifndef BITSHUFFLE_H
-#define BITSHUFFLE_H
-
-#include <stdlib.h>
-#include "bitshuffle_core.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ---- bshuf_compress_lz4_bound ----
- *
- * Bound on size of data compressed with *bshuf_compress_lz4*.
- *
- * Parameters
- * ----------
- *  size : number of elements in input
- *  elem_size : element size of typed data
- *  block_size : Process in blocks of this many elements. Pass 0 to
- *  select automatically (recommended).
- *
- * Returns
- * -------
- *  Bound on compressed data size.
- *
- */
-size_t bshuf_compress_lz4_bound(const size_t size,
-        const size_t elem_size, size_t block_size);
-
-
-/* ---- bshuf_compress_lz4 ----
- *
- * Bitshuffled and compress the data using LZ4.
- *
- * Transpose within elements, in blocks of data of *block_size* elements then
- * compress the blocks using LZ4.  In the output buffer, each block is prefixed
- * by a 4 byte integer giving the compressed size of that block.
- *
- * Output buffer must be large enough to hold the compressed data.  This could
- * be in principle substantially larger than the input buffer.  Use the routine
- * *bshuf_compress_lz4_bound* to get an upper limit.
- *
- * Parameters
- * ----------
- *  in : input buffer, must be of size * elem_size bytes
- *  out : output buffer, must be large enough to hold data.
- *  size : number of elements in input
- *  elem_size : element size of typed data
- *  block_size : Process in blocks of this many elements. Pass 0 to
- *  select automatically (recommended).
- *
- * Returns
- * -------
- *  number of bytes used in output buffer, negative error-code if failed.
- *
- */
-int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t
-        elem_size, size_t block_size);
-
-
-/* ---- bshuf_decompress_lz4 ----
- *
- * Undo compression and bitshuffling.
- *
- * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
- *
- * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
- * must patch the parameters used to compress the data.
- *
- * NOT TO BE USED WITH UNTRUSTED DATA: This routine uses the function
- * LZ4_decompress_fast from LZ4, which does not protect against maliciously
- * formed datasets. By modifying the compressed data, this function could be
- * coerced into leaving the boundaries of the input buffer.
- *
- * Parameters
- * ----------
- *  in : input buffer
- *  out : output buffer, must be of size * elem_size bytes
- *  size : number of elements in input
- *  elem_size : element size of typed data
- *  block_size : Process in blocks of this many elements. Pass 0 to
- *  select automatically (recommended).
- *
- * Returns
- * -------
- *  number of bytes consumed in *input* buffer, negative error-code if failed.
- *
- */
-int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif  // BITSHUFFLE_H
diff --git a/src/bitshuffle/src/bitshuffle_core.c b/src/bitshuffle/src/bitshuffle_core.c
deleted file mode 100644
index 8028e3a6..00000000
--- a/src/bitshuffle/src/bitshuffle_core.c
+++ /dev/null
@@ -1,1862 +0,0 @@
-/*
- * Bitshuffle - Filter for improving compression of typed binary data.
- *
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-#include "bitshuffle_core.h"
-#include "bitshuffle_internals.h"
-
-#include <stdio.h>
-#include <string.h>
-
-
-#if defined(__AVX2__) && defined (__SSE2__)
-#define USEAVX2
-#endif
-
-#if defined(__SSE2__)
-#define USESSE2
-#endif
-
-#if defined(__ARM_NEON__) || (__ARM_NEON)
-#define USEARMNEON
-#endif
-
-// Conditional includes for SSE2 and AVX2.
-#ifdef USEAVX2
-#include <immintrin.h>
-#elif defined USESSE2
-#include <emmintrin.h>
-#elif defined USEARMNEON
-#include <arm_neon.h>
-#endif
-
-#if defined(_OPENMP) && defined(_MSC_VER)
-typedef int64_t omp_size_t;
-#else
-typedef size_t omp_size_t;
-#endif
-
-// Macros.
-#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
-#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
-
-
-/* ---- Functions indicating compile time instruction set. ---- */
-
-int bshuf_using_NEON(void) {
-#ifdef USEARMNEON
-    return 1;
-#else
-    return 0;
-#endif
-}
-
-
-int bshuf_using_SSE2(void) {
-#ifdef USESSE2
-    return 1;
-#else
-    return 0;
-#endif
-}
-
-
-int bshuf_using_AVX2(void) {
-#ifdef USEAVX2
-    return 1;
-#else
-    return 0;
-#endif
-}
-
-
-/* ---- Worker code not requiring special instruction sets. ----
- *
- * The following code does not use any x86 specific vectorized instructions
- * and should compile on any machine
- *
- */
-
-/* Transpose 8x8 bit array packed into a single quadword *x*.
- * *t* is workspace. */
-#define TRANS_BIT_8X8(x, t) {                                               \
-        t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL;                          \
-        x = x ^ t ^ (t << 7);                                               \
-        t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL;                         \
-        x = x ^ t ^ (t << 14);                                              \
-        t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL;                         \
-        x = x ^ t ^ (t << 28);                                              \
-    }
-
-/* Transpose 8x8 bit array along the diagonal from upper right
-   to lower left */
-#define TRANS_BIT_8X8_BE(x, t) {                                            \
-        t = (x ^ (x >> 9)) & 0x0055005500550055LL;                          \
-        x = x ^ t ^ (t << 9);                                               \
-        t = (x ^ (x >> 18)) & 0x0000333300003333LL;                         \
-        x = x ^ t ^ (t << 18);                                              \
-        t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL;                         \
-        x = x ^ t ^ (t << 36);                                              \
-    }
-
-/* Transpose of an array of arbitrarily typed elements. */
-#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) {                        \
-        size_t ii, jj, kk;                                                  \
-        const type_t* in_type = (const type_t*) in;                                 \
-        type_t* out_type = (type_t*) out;                                   \
-        for(ii = 0; ii + 7 < lda; ii += 8) {                                \
-            for(jj = 0; jj < ldb; jj++) {                                   \
-                for(kk = 0; kk < 8; kk++) {                                 \
-                    out_type[jj*lda + ii + kk] =                            \
-                        in_type[ii*ldb + kk * ldb + jj];                    \
-                }                                                           \
-            }                                                               \
-        }                                                                   \
-        for(ii = lda - lda % 8; ii < lda; ii ++) {                          \
-            for(jj = 0; jj < ldb; jj++) {                                   \
-                out_type[jj*lda + ii] = in_type[ii*ldb + jj];                            \
-            }                                                               \
-        }                                                                   \
-    }
-
-
-/* Memory copy with bshuf call signature. For testing and profiling. */
-int64_t bshuf_copy(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    memcpy(out_b, in_b, size * elem_size);
-    return size * elem_size;
-}
-
-
-/* Transpose bytes within elements, starting partway through input. */
-int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size,
-         const size_t elem_size, const size_t start) {
-
-    size_t ii, jj, kk;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    CHECK_MULT_EIGHT(start);
-
-    if (size > start) {
-        // ii loop separated into 2 loops so the compiler can unroll
-        // the inner one.
-        for (ii = start; ii + 7 < size; ii += 8) {
-            for (jj = 0; jj < elem_size; jj++) {
-                for (kk = 0; kk < 8; kk++) {
-                    out_b[jj * size + ii + kk]
-                        = in_b[ii * elem_size + kk * elem_size + jj];
-                }
-            }
-        }
-        for (ii = size - size % 8; ii < size; ii ++) {
-            for (jj = 0; jj < elem_size; jj++) {
-                out_b[jj * size + ii] = in_b[ii * elem_size + jj];
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Transpose bytes within elements. */
-int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0);
-}
-
-
-/* Transpose bits within bytes. */
-int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size,
-         const size_t elem_size, const size_t start_byte) {
-
-    const uint64_t* in_b = (const uint64_t*) in;
-    uint8_t* out_b = (uint8_t*) out;
-
-    uint64_t x, t;
-
-    size_t ii, kk;
-    size_t nbyte = elem_size * size;
-    size_t nbyte_bitrow = nbyte / 8;
-
-    uint64_t e=1;
-    const int little_endian = *(uint8_t *) &e == 1;
-    const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow;
-    const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow;
-
-    CHECK_MULT_EIGHT(nbyte);
-    CHECK_MULT_EIGHT(start_byte);
-
-    for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) {
-        x = in_b[ii];
-        if (little_endian) {
-            TRANS_BIT_8X8(x, t);
-        } else {
-            TRANS_BIT_8X8_BE(x, t);
-        }
-        for (kk = 0; kk < 8; kk ++) {
-            out_b[bit_row_offset + kk * bit_row_skip + ii] = x;
-            x = x >> 8;
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Transpose bits within bytes. */
-int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0);
-}
-
-
-/* General transpose of an array, optimized for large element sizes. */
-int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda,
-        const size_t ldb, const size_t elem_size) {
-
-    size_t ii, jj;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    for(ii = 0; ii < lda; ii++) {
-        for(jj = 0; jj < ldb; jj++) {
-            memcpy(&out_b[(jj*lda + ii) * elem_size],
-                   &in_b[(ii*ldb + jj) * elem_size], elem_size);
-        }
-    }
-    return lda * ldb * elem_size;
-}
-
-
-/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */
-int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t nbyte_bitrow = size / 8;
-
-    CHECK_MULT_EIGHT(size);
-
-    return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow);
-}
-
-
-/* Transpose bits within elements. */
-int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-    void *tmp_buf;
-
-    CHECK_MULT_EIGHT(size);
-
-    tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-
-/* For data organized into a row for each bit (8 * elem_size rows), transpose
- * the bytes. */
-int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    size_t ii, jj, kk, nbyte_row;
-    const char *in_b;
-    char *out_b;
-
-
-    in_b = (const char*) in;
-    out_b = (char*) out;
-
-    nbyte_row = size / 8;
-
-    CHECK_MULT_EIGHT(size);
-
-    for (jj = 0; jj < elem_size; jj++) {
-        for (ii = 0; ii < nbyte_row; ii++) {
-            for (kk = 0; kk < 8; kk++) {
-                out_b[ii * 8 * elem_size + jj * 8 + kk] = \
-                        in_b[(jj * 8 + kk) * nbyte_row + ii];
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Shuffle bits within the bytes of eight element blocks. */
-int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \
-        const size_t size, const size_t elem_size) {
-
-    const char *in_b;
-    char *out_b;
-    uint64_t x, t;
-    size_t ii, jj, kk;
-    size_t nbyte, out_index;
-
-    uint64_t e=1;
-    const int little_endian = *(uint8_t *) &e == 1;
-    const size_t elem_skip = little_endian ? elem_size : -elem_size;
-    const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size;
-
-    CHECK_MULT_EIGHT(size);
-
-    in_b = (const char*) in;
-    out_b = (char*) out;
-
-    nbyte = elem_size * size;
-
-    for (jj = 0; jj < 8 * elem_size; jj += 8) {
-        for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) {
-            x = *((uint64_t*) &in_b[ii + jj]);
-            if (little_endian) {
-                TRANS_BIT_8X8(x, t);
-            } else {
-                TRANS_BIT_8X8_BE(x, t);
-            }
-            for (kk = 0; kk < 8; kk++) {
-                out_index = ii + jj / 8 + elem_offset + kk * elem_skip;
-                *((uint8_t*) &out_b[out_index]) = x;
-                x = x >> 8;
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Untranspose bits within elements. */
-int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-    void *tmp_buf;
-
-    CHECK_MULT_EIGHT(size);
-
-    tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count =  bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-
-/* ---- Worker code that uses Arm NEON ----
- *
- * The following code makes use of the Arm NEON instruction set.
- * NEON technology is the implementation of the ARM Advanced Single
- * Instruction Multiple Data (SIMD) extension.
- * The NEON unit is the component of the processor that executes SIMD instructions.
- * It is also called the NEON Media Processing Engine (MPE).
- *
- */
-
-#ifdef USEARMNEON
-
-/* Transpose bytes within elements for 16 bit elements. */
-int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char *in_b = (const char*) in;
-    char *out_b = (char*) out;
-    int8x16_t a0, b0, a1, b1;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = vld1q_s8(in_b + 2*ii + 0*16);
-        b0 = vld1q_s8(in_b + 2*ii + 1*16);
-
-        a1 = vzip1q_s8(a0, b0);
-        b1 = vzip2q_s8(a0, b0);
-
-        a0 = vzip1q_s8(a1, b1);
-        b0 = vzip2q_s8(a1, b1);
-
-        a1 = vzip1q_s8(a0, b0);
-        b1 = vzip2q_s8(a0, b0);
-
-        a0 = vzip1q_s8(a1, b1);
-        b0 = vzip2q_s8(a1, b1);
-
-        vst1q_s8(out_b + 0*size + ii, a0);
-        vst1q_s8(out_b + 1*size + ii, b0);
-    }
-
-    return bshuf_trans_byte_elem_remainder(in, out, size, 2,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements for 32 bit elements. */
-int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char *in_b;
-    char *out_b;
-    in_b = (const char*) in;
-    out_b = (char*) out;
-    int8x16_t a0, b0, c0, d0, a1, b1, c1, d1;
-    int64x2_t a2, b2, c2, d2;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = vld1q_s8(in_b + 4*ii + 0*16);
-        b0 = vld1q_s8(in_b + 4*ii + 1*16);
-        c0 = vld1q_s8(in_b + 4*ii + 2*16);
-        d0 = vld1q_s8(in_b + 4*ii + 3*16);
-
-        a1 = vzip1q_s8(a0, b0);
-        b1 = vzip2q_s8(a0, b0);
-        c1 = vzip1q_s8(c0, d0);
-        d1 = vzip2q_s8(c0, d0);
-
-        a0 = vzip1q_s8(a1, b1);
-        b0 = vzip2q_s8(a1, b1);
-        c0 = vzip1q_s8(c1, d1);
-        d0 = vzip2q_s8(c1, d1);
-
-        a1 = vzip1q_s8(a0, b0);
-        b1 = vzip2q_s8(a0, b0);
-        c1 = vzip1q_s8(c0, d0);
-        d1 = vzip2q_s8(c0, d0);
-
-        a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
-        b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
-        c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
-        d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
-
-        vst1q_s64((int64_t *) (out_b + 0*size + ii), a2);
-        vst1q_s64((int64_t *) (out_b + 1*size + ii), b2);
-        vst1q_s64((int64_t *) (out_b + 2*size + ii), c2);
-        vst1q_s64((int64_t *) (out_b + 3*size + ii), d2);
-    }
-
-    return bshuf_trans_byte_elem_remainder(in, out, size, 4,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements for 64 bit elements. */
-int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
-    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = vld1q_s8(in_b + 8*ii + 0*16);
-        b0 = vld1q_s8(in_b + 8*ii + 1*16);
-        c0 = vld1q_s8(in_b + 8*ii + 2*16);
-        d0 = vld1q_s8(in_b + 8*ii + 3*16);
-        e0 = vld1q_s8(in_b + 8*ii + 4*16);
-        f0 = vld1q_s8(in_b + 8*ii + 5*16);
-        g0 = vld1q_s8(in_b + 8*ii + 6*16);
-        h0 = vld1q_s8(in_b + 8*ii + 7*16);
-
-        a1 = vzip1q_s8 (a0, b0);
-        b1 = vzip2q_s8 (a0, b0);
-        c1 = vzip1q_s8 (c0, d0);
-        d1 = vzip2q_s8 (c0, d0);
-        e1 = vzip1q_s8 (e0, f0);
-        f1 = vzip2q_s8 (e0, f0);
-        g1 = vzip1q_s8 (g0, h0);
-        h1 = vzip2q_s8 (g0, h0);
-
-        a0 = vzip1q_s8 (a1, b1);
-        b0 = vzip2q_s8 (a1, b1);
-        c0 = vzip1q_s8 (c1, d1);
-        d0 = vzip2q_s8 (c1, d1);
-        e0 = vzip1q_s8 (e1, f1);
-        f0 = vzip2q_s8 (e1, f1);
-        g0 = vzip1q_s8 (g1, h1);
-        h0 = vzip2q_s8 (g1, h1);
-
-        a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
-        b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
-        c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
-        d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
-        e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
-        f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
-        g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
-        h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
-
-        a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
-        b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
-        c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
-        d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
-        e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
-        f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
-        g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
-        h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
-
-        vst1q_s8(out_b + 0*size + ii, a0);
-        vst1q_s8(out_b + 1*size + ii, b0);
-        vst1q_s8(out_b + 2*size + ii, c0);
-        vst1q_s8(out_b + 3*size + ii, d0);
-        vst1q_s8(out_b + 4*size + ii, e0);
-        vst1q_s8(out_b + 5*size + ii, f0);
-        vst1q_s8(out_b + 6*size + ii, g0);
-        vst1q_s8(out_b + 7*size + ii, h0);
-    }
-
-    return bshuf_trans_byte_elem_remainder(in, out, size, 8,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements using best NEON algorithm available. */
-int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    // Trivial cases: power of 2 bytes.
-    switch (elem_size) {
-        case 1:
-            count = bshuf_copy(in, out, size, elem_size);
-            return count;
-        case 2:
-            count = bshuf_trans_byte_elem_NEON_16(in, out, size);
-            return count;
-        case 4:
-            count = bshuf_trans_byte_elem_NEON_32(in, out, size);
-            return count;
-        case 8:
-            count = bshuf_trans_byte_elem_NEON_64(in, out, size);
-            return count;
-    }
-
-    // Worst case: odd number of bytes. Turns out that this is faster for
-    // (odd * 2) byte elements as well (hence % 4).
-    if (elem_size % 4) {
-        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
-        return count;
-    }
-
-    // Multiple of power of 2: transpose hierarchically.
-    {
-        size_t nchunk_elem;
-        void* tmp_buf = malloc(size * elem_size);
-        if (tmp_buf == NULL) return -1;
-
-        if ((elem_size % 8) == 0) {
-            nchunk_elem = elem_size / 8;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
-            count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
-        } else if ((elem_size % 4) == 0) {
-            nchunk_elem = elem_size / 4;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
-            count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
-        } else {
-            // Not used since scalar algorithm is faster.
-            nchunk_elem = elem_size / 2;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
-            count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
-        }
-
-        free(tmp_buf);
-        return count;
-    }
-}
-
-
-/* Creates a mask made up of the most significant
- * bit of each byte of 'input'
- */
-int32_t move_byte_mask_neon(uint8x16_t input) {
-
-    return (  ((input[0] & 0x80) >> 7)          | (((input[1] & 0x80) >> 7) << 1)   | (((input[2] & 0x80) >> 7) << 2)   | (((input[3] & 0x80) >> 7) << 3)
-            | (((input[4] & 0x80) >> 7) << 4)   | (((input[5] & 0x80) >> 7) << 5)   | (((input[6] & 0x80) >> 7) << 6)   | (((input[7] & 0x80) >> 7) << 7)
-            | (((input[8] & 0x80) >> 7) << 8)   | (((input[9] & 0x80) >> 7) << 9)   | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11)
-            | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15)
-           );
-}
-
-/* Transpose bits within bytes. */
-int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t ii, kk;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    uint16_t* out_ui16;
-
-    int64_t count;
-
-    size_t nbyte = elem_size * size;
-
-    CHECK_MULT_EIGHT(nbyte);
-
-    int16x8_t xmm;
-    int32_t bt;
-
-    for (ii = 0; ii + 15 < nbyte; ii += 16) {
-        xmm = vld1q_s16((int16_t *) (in_b + ii));
-        for (kk = 0; kk < 8; kk++) {
-            bt = move_byte_mask_neon((uint8x16_t) xmm);
-            xmm = vshlq_n_s16(xmm, 1);
-            out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
-            *out_ui16 = bt;
-        }
-    }
-    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
-            nbyte - nbyte % 16);
-    return count;
-}
-
-
-/* Transpose bits within elements. */
-int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-
-/* For data organized into a row for each bit (8 * elem_size rows), transpose
- * the bytes. */
-int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t ii, jj;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    CHECK_MULT_EIGHT(size);
-
-    size_t nrows = 8 * elem_size;
-    size_t nbyte_row = size / 8;
-
-    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
-    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
-    int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
-
-    for (ii = 0; ii + 7 < nrows; ii += 8) {
-        for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
-            a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj);
-            b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj);
-            c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj);
-            d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj);
-            e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj);
-            f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj);
-            g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj);
-            h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj);
-
-            a1 = vzip1q_s8(a0, b0);
-            b1 = vzip1q_s8(c0, d0);
-            c1 = vzip1q_s8(e0, f0);
-            d1 = vzip1q_s8(g0, h0);
-            e1 = vzip2q_s8(a0, b0);
-            f1 = vzip2q_s8(c0, d0);
-            g1 = vzip2q_s8(e0, f0);
-            h1 = vzip2q_s8(g0, h0);
-
-            a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
-            b0=  (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
-            c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
-            d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
-            e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
-            f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
-            g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
-            h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
-
-            a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
-            b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
-            c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
-            d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
-            e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
-            f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
-            g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
-            h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
-
-            as = (int64x1_t *) &a1;
-            bs = (int64x1_t *) &b1;
-            cs = (int64x1_t *) &c1;
-            ds = (int64x1_t *) &d1;
-            es = (int64x1_t *) &e1;
-            fs = (int64x1_t *) &f1;
-            gs = (int64x1_t *) &g1;
-            hs = (int64x1_t *) &h1;
-
-            vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as);
-            vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs);
-            vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs);
-            vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds);
-            vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es);
-            vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs);
-            vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs);
-            vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1));
-            vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs);
-            vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1));
-        }
-        for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
-            out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Shuffle bits within the bytes of eight element blocks. */
-int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    CHECK_MULT_EIGHT(size);
-
-    // With a bit of care, this could be written such that such that it is
-    // in_buf = out_buf safe.
-    const char* in_b = (const char*) in;
-    uint16_t* out_ui16 = (uint16_t*) out;
-
-    size_t ii, jj, kk;
-    size_t nbyte = elem_size * size;
-
-    int16x8_t xmm;
-    int32_t bt;
-
-    if (elem_size % 2) {
-        bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
-    } else {
-        for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
-                ii += 8 * elem_size) {
-            for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
-                xmm = vld1q_s16((int16_t *) &in_b[ii + jj]);
-                for (kk = 0; kk < 8; kk++) {
-                    bt = move_byte_mask_neon((uint8x16_t) xmm);
-                    xmm = vshlq_n_s16(xmm, 1);
-                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
-                    out_ui16[ind / 2] = bt;
-                }
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Untranspose bits within elements. */
-int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count =  bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-#else // #ifdef USEARMNEON
-
-int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
-    return -13;
-}
-
-
-int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
-    return -13;
-}
-
-
-int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -13;
-}
-
-
-#endif
-
-
-
-
-
-/* ---- Worker code that uses SSE2 ----
- *
- * The following code makes use of the SSE2 instruction set and specialized
- * 16 byte registers. The SSE2 instructions are present on modern x86 
- * processors. The first Intel processor microarchitecture supporting SSE2 was
- * Pentium 4 (2000).
- *
- */
-
-#ifdef USESSE2
-
-/* Transpose bytes within elements for 16 bit elements. */
-int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char *in_b = (const char*) in;
-    char *out_b = (char*) out;
-    __m128i a0, b0, a1, b1;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]);
-        b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]);
-
-        a1 = _mm_unpacklo_epi8(a0, b0);
-        b1 = _mm_unpackhi_epi8(a0, b0);
-
-        a0 = _mm_unpacklo_epi8(a1, b1);
-        b0 = _mm_unpackhi_epi8(a1, b1);
-
-        a1 = _mm_unpacklo_epi8(a0, b0);
-        b1 = _mm_unpackhi_epi8(a0, b0);
-
-        a0 = _mm_unpacklo_epi8(a1, b1);
-        b0 = _mm_unpackhi_epi8(a1, b1);
-
-        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
-        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
-    }
-    return bshuf_trans_byte_elem_remainder(in, out, size, 2,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements for 32 bit elements. */
-int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char *in_b;
-    char *out_b;
-    in_b = (const char*) in;
-    out_b = (char*) out;
-    __m128i a0, b0, c0, d0, a1, b1, c1, d1;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]);
-        b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]);
-        c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]);
-        d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]);
-
-        a1 = _mm_unpacklo_epi8(a0, b0);
-        b1 = _mm_unpackhi_epi8(a0, b0);
-        c1 = _mm_unpacklo_epi8(c0, d0);
-        d1 = _mm_unpackhi_epi8(c0, d0);
-
-        a0 = _mm_unpacklo_epi8(a1, b1);
-        b0 = _mm_unpackhi_epi8(a1, b1);
-        c0 = _mm_unpacklo_epi8(c1, d1);
-        d0 = _mm_unpackhi_epi8(c1, d1);
-
-        a1 = _mm_unpacklo_epi8(a0, b0);
-        b1 = _mm_unpackhi_epi8(a0, b0);
-        c1 = _mm_unpacklo_epi8(c0, d0);
-        d1 = _mm_unpackhi_epi8(c0, d0);
-
-        a0 = _mm_unpacklo_epi64(a1, c1);
-        b0 = _mm_unpackhi_epi64(a1, c1);
-        c0 = _mm_unpacklo_epi64(b1, d1);
-        d0 = _mm_unpackhi_epi64(b1, d1);
-
-        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
-        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
-        _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0);
-        _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0);
-    }
-    return bshuf_trans_byte_elem_remainder(in, out, size, 4,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements for 64 bit elements. */
-int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) {
-
-    size_t ii;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    __m128i a0, b0, c0, d0, e0, f0, g0, h0;
-    __m128i a1, b1, c1, d1, e1, f1, g1, h1;
-
-    for (ii=0; ii + 15 < size; ii += 16) {
-        a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]);
-        b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]);
-        c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]);
-        d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]);
-        e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]);
-        f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]);
-        g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]);
-        h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]);
-
-        a1 = _mm_unpacklo_epi8(a0, b0);
-        b1 = _mm_unpackhi_epi8(a0, b0);
-        c1 = _mm_unpacklo_epi8(c0, d0);
-        d1 = _mm_unpackhi_epi8(c0, d0);
-        e1 = _mm_unpacklo_epi8(e0, f0);
-        f1 = _mm_unpackhi_epi8(e0, f0);
-        g1 = _mm_unpacklo_epi8(g0, h0);
-        h1 = _mm_unpackhi_epi8(g0, h0);
-
-        a0 = _mm_unpacklo_epi8(a1, b1);
-        b0 = _mm_unpackhi_epi8(a1, b1);
-        c0 = _mm_unpacklo_epi8(c1, d1);
-        d0 = _mm_unpackhi_epi8(c1, d1);
-        e0 = _mm_unpacklo_epi8(e1, f1);
-        f0 = _mm_unpackhi_epi8(e1, f1);
-        g0 = _mm_unpacklo_epi8(g1, h1);
-        h0 = _mm_unpackhi_epi8(g1, h1);
-
-        a1 = _mm_unpacklo_epi32(a0, c0);
-        b1 = _mm_unpackhi_epi32(a0, c0);
-        c1 = _mm_unpacklo_epi32(b0, d0);
-        d1 = _mm_unpackhi_epi32(b0, d0);
-        e1 = _mm_unpacklo_epi32(e0, g0);
-        f1 = _mm_unpackhi_epi32(e0, g0);
-        g1 = _mm_unpacklo_epi32(f0, h0);
-        h1 = _mm_unpackhi_epi32(f0, h0);
-
-        a0 = _mm_unpacklo_epi64(a1, e1);
-        b0 = _mm_unpackhi_epi64(a1, e1);
-        c0 = _mm_unpacklo_epi64(b1, f1);
-        d0 = _mm_unpackhi_epi64(b1, f1);
-        e0 = _mm_unpacklo_epi64(c1, g1);
-        f0 = _mm_unpackhi_epi64(c1, g1);
-        g0 = _mm_unpacklo_epi64(d1, h1);
-        h0 = _mm_unpackhi_epi64(d1, h1);
-
-        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
-        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
-        _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0);
-        _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0);
-        _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0);
-        _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0);
-        _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0);
-        _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0);
-    }
-    return bshuf_trans_byte_elem_remainder(in, out, size, 8,
-            size - size % 16);
-}
-
-
-/* Transpose bytes within elements using best SSE algorithm available. */
-int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    // Trivial cases: power of 2 bytes.
-    switch (elem_size) {
-        case 1:
-            count = bshuf_copy(in, out, size, elem_size);
-            return count;
-        case 2:
-            count = bshuf_trans_byte_elem_SSE_16(in, out, size);
-            return count;
-        case 4:
-            count = bshuf_trans_byte_elem_SSE_32(in, out, size);
-            return count;
-        case 8:
-            count = bshuf_trans_byte_elem_SSE_64(in, out, size);
-            return count;
-    }
-
-    // Worst case: odd number of bytes. Turns out that this is faster for
-    // (odd * 2) byte elements as well (hence % 4).
-    if (elem_size % 4) {
-        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
-        return count;
-    }
-
-    // Multiple of power of 2: transpose hierarchically.
-    {
-        size_t nchunk_elem;
-        void* tmp_buf = malloc(size * elem_size);
-        if (tmp_buf == NULL) return -1;
-
-        if ((elem_size % 8) == 0) {
-            nchunk_elem = elem_size / 8;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
-            count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
-        } else if ((elem_size % 4) == 0) {
-            nchunk_elem = elem_size / 4;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
-            count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
-        } else {
-            // Not used since scalar algorithm is faster.
-            nchunk_elem = elem_size / 2;
-            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
-            count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
-                    size * nchunk_elem);
-            bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
-        }
-
-        free(tmp_buf);
-        return count;
-    }
-}
-
-
-/* Transpose bits within bytes. */
-int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t ii, kk;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    uint16_t* out_ui16;
-
-    int64_t count;
-
-    size_t nbyte = elem_size * size;
-
-    CHECK_MULT_EIGHT(nbyte);
-
-    __m128i xmm;
-    int32_t bt;
-
-    for (ii = 0; ii + 15 < nbyte; ii += 16) {
-        xmm = _mm_loadu_si128((__m128i *) &in_b[ii]);
-        for (kk = 0; kk < 8; kk++) {
-            bt = _mm_movemask_epi8(xmm);
-            xmm = _mm_slli_epi16(xmm, 1);
-            out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
-            *out_ui16 = bt;
-        }
-    }
-    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
-            nbyte - nbyte % 16);
-    return count;
-}
-
-
-/* Transpose bits within elements. */
-int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bit_byte_SSE(out, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-
-/* For data organized into a row for each bit (8 * elem_size rows), transpose
- * the bytes. */
-int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t ii, jj;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    CHECK_MULT_EIGHT(size);
-
-    size_t nrows = 8 * elem_size;
-    size_t nbyte_row = size / 8;
-
-    __m128i a0, b0, c0, d0, e0, f0, g0, h0;
-    __m128i a1, b1, c1, d1, e1, f1, g1, h1;
-    __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
-
-    for (ii = 0; ii + 7 < nrows; ii += 8) {
-        for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
-            a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]);
-            b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]);
-            c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]);
-            d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]);
-            e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]);
-            f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]);
-            g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]);
-            h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]);
-
-
-            a1 = _mm_unpacklo_epi8(a0, b0);
-            b1 = _mm_unpacklo_epi8(c0, d0);
-            c1 = _mm_unpacklo_epi8(e0, f0);
-            d1 = _mm_unpacklo_epi8(g0, h0);
-            e1 = _mm_unpackhi_epi8(a0, b0);
-            f1 = _mm_unpackhi_epi8(c0, d0);
-            g1 = _mm_unpackhi_epi8(e0, f0);
-            h1 = _mm_unpackhi_epi8(g0, h0);
-
-
-            a0 = _mm_unpacklo_epi16(a1, b1);
-            b0 = _mm_unpacklo_epi16(c1, d1);
-            c0 = _mm_unpackhi_epi16(a1, b1);
-            d0 = _mm_unpackhi_epi16(c1, d1);
-
-            e0 = _mm_unpacklo_epi16(e1, f1);
-            f0 = _mm_unpacklo_epi16(g1, h1);
-            g0 = _mm_unpackhi_epi16(e1, f1);
-            h0 = _mm_unpackhi_epi16(g1, h1);
-
-
-            a1 = _mm_unpacklo_epi32(a0, b0);
-            b1 = _mm_unpackhi_epi32(a0, b0);
-
-            c1 = _mm_unpacklo_epi32(c0, d0);
-            d1 = _mm_unpackhi_epi32(c0, d0);
-
-            e1 = _mm_unpacklo_epi32(e0, f0);
-            f1 = _mm_unpackhi_epi32(e0, f0);
-
-            g1 = _mm_unpacklo_epi32(g0, h0);
-            h1 = _mm_unpackhi_epi32(g0, h0);
-
-            // We don't have a storeh instruction for integers, so interpret
-            // as a float. Have a storel (_mm_storel_epi64).
-            as = (__m128 *) &a1;
-            bs = (__m128 *) &b1;
-            cs = (__m128 *) &c1;
-            ds = (__m128 *) &d1;
-            es = (__m128 *) &e1;
-            fs = (__m128 *) &f1;
-            gs = (__m128 *) &g1;
-            hs = (__m128 *) &h1;
-
-            _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs);
-            _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs);
-
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs);
-            _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs);
-        }
-        for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
-            out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
-            out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Shuffle bits within the bytes of eight element blocks. */
-int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    CHECK_MULT_EIGHT(size);
-
-    // With a bit of care, this could be written such that such that it is
-    // in_buf = out_buf safe.
-    const char* in_b = (const char*) in;
-    uint16_t* out_ui16 = (uint16_t*) out;
-
-    size_t ii, jj, kk;
-    size_t nbyte = elem_size * size;
-
-    __m128i xmm;
-    int32_t bt;
-
-    if (elem_size % 2) {
-        bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
-    } else {
-        for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
-                ii += 8 * elem_size) {
-            for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
-                xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]);
-                for (kk = 0; kk < 8; kk++) {
-                    bt = _mm_movemask_epi8(xmm);
-                    xmm = _mm_slli_epi16(xmm, 1);
-                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
-                    out_ui16[ind / 2] = bt;
-                }
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Untranspose bits within elements. */
-int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_bitrow_SSE(in, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count =  bshuf_shuffle_bit_eightelem_SSE(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-#else // #ifdef USESSE2
-
-
-int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) {
-    return -11;
-}
-
-
-int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) {
-    return -11;
-}
-
-
-int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -11;
-}
-
-
-#endif // #ifdef USESSE2
-
-
-/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
-
-/* ---- Worker code that uses AVX2 ----
- *
- * The following code makes use of the AVX2 instruction set and specialized
- * 32 byte registers. The AVX2 instructions are present on newer x86
- * processors. The first Intel processor microarchitecture supporting AVX2 was
- * Haswell (2013).
- *
- */
-
-#ifdef USEAVX2
-
-/* Transpose bits within bytes. */
-int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t ii, kk;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-    int32_t* out_i32;
-
-    size_t nbyte = elem_size * size;
-
-    int64_t count;
-
-    __m256i ymm;
-    int32_t bt;
-
-    for (ii = 0; ii + 31 < nbyte; ii += 32) {
-        ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]);
-        for (kk = 0; kk < 8; kk++) {
-            bt = _mm256_movemask_epi8(ymm);
-            ymm = _mm256_slli_epi16(ymm, 1);
-            out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
-            *out_i32 = bt;
-        }
-    }
-    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
-            nbyte - nbyte % 32);
-    return count;
-}
-
-
-/* Transpose bits within elements. */
-int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bit_byte_AVX(out, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-
-    return count;
-}
-
-
-/* For data organized into a row for each bit (8 * elem_size rows), transpose
- * the bytes. */
-int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    size_t hh, ii, jj, kk, mm;
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    CHECK_MULT_EIGHT(size);
-
-    size_t nrows = 8 * elem_size;
-    size_t nbyte_row = size / 8;
-
-    if (elem_size % 4) return bshuf_trans_byte_bitrow_SSE(in, out, size,
-            elem_size);
-
-    __m256i ymm_0[8];
-    __m256i ymm_1[8];
-    __m256i ymm_storeage[8][4];
-
-    for (jj = 0; jj + 31 < nbyte_row; jj += 32) {
-        for (ii = 0; ii + 3 < elem_size; ii += 4) {
-            for (hh = 0; hh < 4; hh ++) {
-
-                for (kk = 0; kk < 8; kk ++){
-                    ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[
-                            (ii * 8 + hh * 8 + kk) * nbyte_row + jj]);
-                }
-
-                for (kk = 0; kk < 4; kk ++){
-                    ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2],
-                            ymm_0[kk * 2 + 1]);
-                    ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2],
-                            ymm_0[kk * 2 + 1]);
-                }
-
-                for (kk = 0; kk < 2; kk ++){
-                    for (mm = 0; mm < 2; mm ++){
-                        ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16(
-                                ymm_1[kk * 4 + mm * 2],
-                                ymm_1[kk * 4 + mm * 2 + 1]);
-                        ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16(
-                                ymm_1[kk * 4 + mm * 2],
-                                ymm_1[kk * 4 + mm * 2 + 1]);
-                    }
-                }
-
-                for (kk = 0; kk < 4; kk ++){
-                    ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2],
-                            ymm_0[kk * 2 + 1]);
-                    ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2],
-                            ymm_0[kk * 2 + 1]);
-                }
-
-                for (kk = 0; kk < 8; kk ++){
-                    ymm_storeage[kk][hh] = ymm_1[kk];
-                }
-            }
-
-            for (mm = 0; mm < 8; mm ++) {
-
-                for (kk = 0; kk < 4; kk ++){
-                    ymm_0[kk] = ymm_storeage[mm][kk];
-                }
-
-                ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]);
-                ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]);
-                ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]);
-                ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]);
-
-                ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32);
-                ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32);
-                ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49);
-                ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49);
-
-                _mm256_storeu_si256((__m256i *) &out_b[
-                        (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]);
-                _mm256_storeu_si256((__m256i *) &out_b[
-                        (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]);
-                _mm256_storeu_si256((__m256i *) &out_b[
-                        (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]);
-                _mm256_storeu_si256((__m256i *) &out_b[
-                        (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]);
-            }
-        }
-    }
-    for (ii = 0; ii < nrows; ii ++ ) {
-        for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) {
-            out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj];
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Shuffle bits within the bytes of eight element blocks. */
-int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    CHECK_MULT_EIGHT(size);
-
-    // With a bit of care, this could be written such that such that it is
-    // in_buf = out_buf safe.
-    const char* in_b = (const char*) in;
-    char* out_b = (char*) out;
-
-    size_t ii, jj, kk;
-    size_t nbyte = elem_size * size;
-
-    __m256i ymm;
-    int32_t bt;
-
-    if (elem_size % 4) {
-        return bshuf_shuffle_bit_eightelem_SSE(in, out, size, elem_size);
-    } else {
-        for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) {
-            for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
-                    ii += 8 * elem_size) {
-                ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]);
-                for (kk = 0; kk < 8; kk++) {
-                    bt = _mm256_movemask_epi8(ymm);
-                    ymm = _mm256_slli_epi16(ymm, 1);
-                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
-                    * (int32_t *) &out_b[ind] = bt;
-                }
-            }
-        }
-    }
-    return size * elem_size;
-}
-
-
-/* Untranspose bits within elements. */
-int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-
-    int64_t count;
-
-    CHECK_MULT_EIGHT(size);
-
-    void* tmp_buf = malloc(size * elem_size);
-    if (tmp_buf == NULL) return -1;
-
-    count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size);
-    CHECK_ERR_FREE(count, tmp_buf);
-    count =  bshuf_shuffle_bit_eightelem_AVX(tmp_buf, out, size, elem_size);
-
-    free(tmp_buf);
-    return count;
-}
-
-
-#else // #ifdef USEAVX2
-
-int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -12;
-}
-
-
-int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -12;
-}
-
-
-int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -12;
-}
-
-
-int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -12;
-}
-
-
-int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
-         const size_t elem_size) {
-    return -12;
-}
-
-#endif // #ifdef USEAVX2
-
-
-/* ---- Drivers selecting best instruction set at compile time. ---- */
-
-int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, 
-        const size_t elem_size) {
-
-    int64_t count;
-#ifdef USEAVX2
-    count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
-#elif defined(USESSE2)
-    count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
-#elif defined(USEARMNEON)
-    count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size);
-#else
-    count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
-#endif
-    return count;
-}
-
-
-int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, 
-        const size_t elem_size) {
-
-    int64_t count;
-#ifdef USEAVX2
-    count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
-#elif defined(USESSE2)
-    count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
-#elif defined(USEARMNEON)
-    count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size);
-#else
-    count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
-#endif
-    return count;
-}
-
-
-/* ---- Wrappers for implementing blocking ---- */
-
-/* Wrap a function for processing a single block to process an entire buffer in
- * parallel. */
-int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
-        const size_t size, const size_t elem_size, size_t block_size) {
-
-    omp_size_t ii = 0;
-    int64_t err = 0;
-    int64_t count, cum_count=0;
-    size_t last_block_size;
-    size_t leftover_bytes;
-    size_t this_iter;
-    char *last_in;
-    char *last_out;
-
-
-    ioc_chain C;
-    ioc_init(&C, in, out);
-
-
-    if (block_size == 0) {
-        block_size = bshuf_default_block_size(elem_size);
-    }
-    if (block_size % BSHUF_BLOCKED_MULT) return -81;
-
-#if defined(_OPENMP)
-    #pragma omp parallel for schedule(dynamic, 1) \
-            private(count) reduction(+ : cum_count)
-#endif
-    for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) {
-        count = fun(&C, block_size, elem_size);
-        if (count < 0) err = count;
-        cum_count += count;
-    }
-
-    last_block_size = size % block_size;
-    last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
-    if (last_block_size) {
-        count = fun(&C, last_block_size, elem_size);
-        if (count < 0) err = count;
-        cum_count += count;
-    }
-
-    if (err < 0) return err;
-
-    leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
-    //this_iter;
-    last_in = (char *) ioc_get_in(&C, &this_iter);
-    ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
-    last_out = (char *) ioc_get_out(&C, &this_iter);
-    ioc_set_next_out(&C, &this_iter, (void *) (last_out + leftover_bytes));
-
-    memcpy(last_out, last_in, leftover_bytes);
-
-    ioc_destroy(&C);
-
-    return cum_count + leftover_bytes;
-}
-
-
-/* Bitshuffle a single block. */
-int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
-        const size_t size, const size_t elem_size) {
-
-    size_t this_iter;
-    const void *in;
-    void *out;
-    int64_t count;
-
-
-    
-    in = ioc_get_in(C_ptr, &this_iter);
-    ioc_set_next_in(C_ptr, &this_iter,
-            (void*) ((char*) in + size * elem_size));
-    out = ioc_get_out(C_ptr, &this_iter);
-    ioc_set_next_out(C_ptr, &this_iter,
-            (void *) ((char *) out + size * elem_size));
-
-    count = bshuf_trans_bit_elem(in, out, size, elem_size);
-    return count;
-}
-
-
-/* Bitunshuffle a single block. */
-int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
-        const size_t size, const size_t elem_size) {
-
-
-    size_t this_iter;
-    const void *in;
-    void *out;
-    int64_t count;
-
-
-
-
-    in = ioc_get_in(C_ptr, &this_iter);
-    ioc_set_next_in(C_ptr, &this_iter,
-            (void*) ((char*) in + size * elem_size));
-    out = ioc_get_out(C_ptr, &this_iter);
-    ioc_set_next_out(C_ptr, &this_iter,
-            (void *) ((char *) out + size * elem_size));
-
-    count = bshuf_untrans_bit_elem(in, out, size, elem_size);
-    return count;
-}
-
-
-/* Write a 64 bit unsigned integer to a buffer in big endian order. */
-void bshuf_write_uint64_BE(void* buf, uint64_t num) {
-    int ii;
-    uint8_t* b = (uint8_t*) buf;
-    uint64_t pow28 = 1 << 8;
-    for (ii = 7; ii >= 0; ii--) {
-        b[ii] = num % pow28;
-        num = num / pow28;
-    }
-}
-
-
-/* Read a 64 bit unsigned integer from a buffer big endian order. */
-uint64_t bshuf_read_uint64_BE(void* buf) {
-    int ii;
-    uint8_t* b = (uint8_t*) buf;
-    uint64_t num = 0, pow28 = 1 << 8, cp = 1;
-    for (ii = 7; ii >= 0; ii--) {
-        num += b[ii] * cp;
-        cp *= pow28;
-    }
-    return num;
-}
-
-
-/* Write a 32 bit unsigned integer to a buffer in big endian order. */
-void bshuf_write_uint32_BE(void* buf, uint32_t num) {
-    int ii;
-    uint8_t* b = (uint8_t*) buf;
-    uint32_t pow28 = 1 << 8;
-    for (ii = 3; ii >= 0; ii--) {
-        b[ii] = num % pow28;
-        num = num / pow28;
-    }
-}
-
-
-/* Read a 32 bit unsigned integer from a buffer big endian order. */
-uint32_t bshuf_read_uint32_BE(const void* buf) {
-    int ii;
-    uint8_t* b = (uint8_t*) buf;
-    uint32_t num = 0, pow28 = 1 << 8, cp = 1;
-    for (ii = 3; ii >= 0; ii--) {
-        num += b[ii] * cp;
-        cp *= pow28;
-    }
-    return num;
-}
-
-
-/* ---- Public functions ----
- *
- * See header file for description and usage.
- *
- */
-
-size_t bshuf_default_block_size(const size_t elem_size) {
-    // This function needs to be absolutely stable between versions.
-    // Otherwise encoded data will not be decodable.
-
-    size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
-    // Ensure it is a required multiple.
-    block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
-    return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
-}
-
-
-int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size) {
-
-    return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
-            elem_size, block_size);
-}
-
-
-int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size) {
-
-    return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
-            elem_size, block_size);
-}
-
-
-#undef TRANS_BIT_8X8
-#undef TRANS_ELEM_TYPE
-#undef MAX
-#undef CHECK_MULT_EIGHT
-#undef CHECK_ERR_FREE
-
-#undef USESSE2
-#undef USEAVX2
diff --git a/src/bitshuffle/src/bitshuffle_core.h b/src/bitshuffle/src/bitshuffle_core.h
deleted file mode 100644
index 7f66b6d3..00000000
--- a/src/bitshuffle/src/bitshuffle_core.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Bitshuffle - Filter for improving compression of typed binary data.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- *
- * Header File
- *
- * Worker routines return an int64_t which is the number of bytes processed
- * if positive or an error code if negative.
- *
- * Error codes:
- *      -1    : Failed to allocate memory.
- *      -11   : Missing SSE.
- *      -12   : Missing AVX.
- *      -13   : Missing Arm Neon.
- *      -80   : Input size not a multiple of 8.
- *      -81   : block_size not multiple of 8.
- *      -91   : Decompression error, wrong number of bytes processed.
- *      -1YYY : Error internal to compression routine with error code -YYY.
- */
-
-
-#ifndef BITSHUFFLE_CORE_H
-#define BITSHUFFLE_CORE_H
-
-// We assume GNU g++ defining `__cplusplus` has stdint.h
-#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
-#include <stdint.h>
-#else
-  typedef unsigned char       uint8_t;
-  typedef unsigned short      uint16_t;
-  typedef unsigned int        uint32_t;
-  typedef   signed int        int32_t;
-  typedef unsigned long long  uint64_t;
-  typedef long long           int64_t;
-#endif
-
-#include <stdlib.h>
-
-
-// These are usually set in the setup.py.
-#ifndef BSHUF_VERSION_MAJOR
-#define BSHUF_VERSION_MAJOR 0
-#define BSHUF_VERSION_MINOR 3
-#define BSHUF_VERSION_POINT 5
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* --- bshuf_using_SSE2 ----
- *
- * Whether routines where compiled with the SSE2 instruction set.
- *
- * Returns
- * -------
- *  1 if using SSE2, 0 otherwise.
- *
- */
-int bshuf_using_SSE2(void);
-
-
-/* ---- bshuf_using_AVX2 ----
- *
- * Whether routines where compiled with the AVX2 instruction set.
- *
- * Returns
- * -------
- *  1 if using AVX2, 0 otherwise.
- *
- */
-int bshuf_using_AVX2(void);
-
-
-/* ---- bshuf_default_block_size ----
- *
- * The default block size as function of element size.
- *
- * This is the block size used by the blocked routines (any routine
- * taking a *block_size* argument) when the block_size is not provided
- * (zero is passed).
- *
- * The results of this routine are guaranteed to be stable such that
- * shuffled/compressed data can always be decompressed.
- *
- * Parameters
- * ----------
- *  elem_size : element size of data to be shuffled/compressed.
- *
- */
-size_t bshuf_default_block_size(const size_t elem_size);
-
-
-/* ---- bshuf_bitshuffle ----
- *
- * Bitshuffle the data.
- *
- * Transpose the bits within elements, in blocks of *block_size*
- * elements.
- *
- * Parameters
- * ----------
- *  in : input buffer, must be of size * elem_size bytes
- *  out : output buffer, must be of size * elem_size bytes
- *  size : number of elements in input
- *  elem_size : element size of typed data
- *  block_size : Do transpose in blocks of this many elements. Pass 0 to
- *  select automatically (recommended).
- *
- * Returns
- * -------
- *  number of bytes processed, negative error-code if failed.
- *
- */
-int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size);
-
-
-/* ---- bshuf_bitunshuffle ----
- *
- * Unshuffle bitshuffled data.
- *
- * Untranspose the bits within elements, in blocks of *block_size*
- * elements.
- *
- * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
- * must match the parameters used to shuffle the data.
- *
- * Parameters
- * ----------
- *  in : input buffer, must be of size * elem_size bytes
- *  out : output buffer, must be of size * elem_size bytes
- *  size : number of elements in input
- *  elem_size : element size of typed data
- *  block_size : Do transpose in blocks of this many elements. Pass 0 to
- *  select automatically (recommended).
- *
- * Returns
- * -------
- *  number of bytes processed, negative error-code if failed.
- *
- */
-int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
-        const size_t elem_size, size_t block_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif  // BITSHUFFLE_CORE_H
diff --git a/src/bitshuffle/src/bitshuffle_internals.h b/src/bitshuffle/src/bitshuffle_internals.h
deleted file mode 100644
index e039925c..00000000
--- a/src/bitshuffle/src/bitshuffle_internals.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Bitshuffle - Filter for improving compression of typed binary data.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- */
-
-
-#ifndef BITSHUFFLE_INTERNALS_H
-#define BITSHUFFLE_INTERNALS_H
-
-// We assume GNU g++ defining `__cplusplus` has stdint.h
-#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
-#include <stdint.h>
-#else
-  typedef unsigned char       uint8_t;
-  typedef unsigned short      uint16_t;
-  typedef unsigned int        uint32_t;
-  typedef   signed int        int32_t;
-  typedef unsigned long long  uint64_t;
-  typedef long long           int64_t;
-#endif
-
-#include <stdlib.h>
-#include "iochain.h"
-
-
-// Constants.
-#ifndef BSHUF_MIN_RECOMMEND_BLOCK
-#define BSHUF_MIN_RECOMMEND_BLOCK 128
-#define BSHUF_BLOCKED_MULT 8    // Block sizes must be multiple of this.
-#define BSHUF_TARGET_BLOCK_SIZE_B 8192
-#endif
-
-
-// Macros.
-#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* ---- Utility functions for internal use only ---- */
-
-int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
-        const size_t elem_size);
-
-/* Read a 32 bit unsigned integer from a buffer big endian order. */
-uint32_t bshuf_read_uint32_BE(const void* buf);
-
-/* Write a 32 bit unsigned integer to a buffer in big endian order. */
-void bshuf_write_uint32_BE(void* buf, uint32_t num);
-
-int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
-        const size_t elem_size);
-
-/* Function definition for worker functions that process a single block. */
-typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
-        const size_t size, const size_t elem_size);
-
-/* Wrap a function for processing a single block to process an entire buffer in
- * parallel. */
-int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
-        const size_t size, const size_t elem_size, size_t block_size);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif  // BITSHUFFLE_INTERNALS_H
diff --git a/src/bitshuffle/src/bshuf_h5filter.c b/src/bitshuffle/src/bshuf_h5filter.c
deleted file mode 100644
index f67a4a2b..00000000
--- a/src/bitshuffle/src/bshuf_h5filter.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Bitshuffle HDF5 filter
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-#include "bitshuffle.h"
-#include "bshuf_h5filter.h"
-
-
-#define PUSH_ERR(func, minor, str)                                      \
-    H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
-
-
-// Prototypes from bitshuffle.c
-void bshuf_write_uint64_BE(void* buf, uint64_t num);
-uint64_t bshuf_read_uint64_BE(void* buf);
-void bshuf_write_uint32_BE(void* buf, uint32_t num);
-uint32_t bshuf_read_uint32_BE(const void* buf);
-
-
-// Only called on compresion, not on reverse.
-herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
-
-    herr_t r;
-    size_t ii;
-
-    unsigned int elem_size;
-
-    unsigned int flags;
-    size_t nelements = 8;
-    size_t nelem_max = 11;
-    unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
-    unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
-    char msg[80];
-
-    r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
-            tmp_values, 0, NULL, NULL);
-    if(r<0) return -1;
-
-    // First 3 slots reserved. Move any passed options to higher addresses.
-    for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
-        values[ii + 3] = tmp_values[ii];
-    }
-
-    nelements = 3 + nelements;
-
-    values[0] = BSHUF_VERSION_MAJOR;
-    values[1] = BSHUF_VERSION_MINOR;
-
-    elem_size = H5Tget_size(type);
-    if(elem_size <= 0) {
-        PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
-                "Invalid element size.");
-        return -1;
-    }
-
-    values[2] = elem_size;
-
-    // Validate user supplied arguments.
-    if (nelements > 3) {
-        if (values[3] % 8 || values[3] < 0) {
-            sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
-                    values[3]);
-            PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
-            return -1;
-        }
-    }
-    if (nelements > 4) {
-        switch (values[4]) {
-            case 0:
-                break;
-            case BSHUF_H5_COMPRESS_LZ4:
-                break;
-            default:
-                PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
-                         "Invalid bitshuffle compression.");
-        }
-    }
-
-    r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
-    if(r<0) return -1;
-
-    return 1;
-}
-
-
-size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
-           const unsigned int cd_values[], size_t nbytes,
-           size_t *buf_size, void **buf) {
-
-    size_t size, elem_size;
-    int err;
-    char msg[80];
-    size_t block_size = 0;
-    size_t buf_size_out, nbytes_uncomp, nbytes_out;
-    char* in_buf = *buf;
-    void *out_buf;
-
-    if (cd_nelmts < 3) {
-        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
-                "Not enough parameters.");
-        return 0;
-    }
-    elem_size = cd_values[2];
-
-    // User specified block size.
-    if (cd_nelmts > 3) block_size = cd_values[3];
-
-    if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
-
-    // Compression in addition to bitshiffle.
-    if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
-        if (flags & H5Z_FLAG_REVERSE) {
-            // First eight bytes is the number of bytes in the output buffer,
-            // little endian.
-            nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
-            // Override the block size with the one read from the header.
-            block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
-            // Skip over the header.
-            in_buf += 12;
-            buf_size_out = nbytes_uncomp;
-        } else {
-            nbytes_uncomp = nbytes;
-            buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, 
-                    elem_size, block_size) + 12;
-        }
-    } else {
-        nbytes_uncomp = nbytes;
-        buf_size_out = nbytes;
-    }
-
-    // TODO, remove this restriction by memcopying the extra.
-    if (nbytes_uncomp % elem_size) {
-        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
-                "Non integer number of elements.");
-        return 0;
-    }
-    size = nbytes_uncomp / elem_size;
-
-    out_buf = malloc(buf_size_out);
-    if (out_buf == NULL) {
-        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
-                "Could not allocate output buffer.");
-        return 0;
-    }
-
-    if (cd_nelmts > 4 && cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
-        if (flags & H5Z_FLAG_REVERSE) {
-            // Bit unshuffle/decompress.
-            err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
-            nbytes_out = nbytes_uncomp;
-        } else {
-            // Bit shuffle/compress.
-            // Write the header, described in
-            // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
-            // Techincally we should be using signed integers instead of
-            // unsigned ones, however for valid inputs (positive numbers) these
-            // have the same representation.
-            bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
-            bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
-            err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
-                    elem_size, block_size); nbytes_out = err + 12; } } else {
-                if (flags & H5Z_FLAG_REVERSE) {
-            // Bit unshuffle.
-            err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
-                    block_size); } else {
-            // Bit shuffle.
-            err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
-                    block_size); } nbytes_out = nbytes; }
-    //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
-    //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
-
-    if (err < 0) {
-        sprintf(msg, "Error in bitshuffle with error code %d.", err);
-        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
-        free(out_buf);
-        return 0;
-    } else {
-        free(*buf);
-        *buf = out_buf;
-        *buf_size = buf_size_out;
-
-        return nbytes_out;
-    }
-}
-
-
-
-H5Z_class_t bshuf_H5Filter[1] = {{
-    H5Z_CLASS_T_VERS,
-    (H5Z_filter_t)(BSHUF_H5FILTER),
-    1, 1,
-    "bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
-    NULL,
-    (H5Z_set_local_func_t)(bshuf_h5_set_local),
-    (H5Z_func_t)(bshuf_h5_filter)
-}};
-
-
-int bshuf_register_h5filter(void){
-
-    int retval;
-
-    retval = H5Zregister(bshuf_H5Filter);
-    if(retval<0){
-        PUSH_ERR("bshuf_register_h5filter",
-                 H5E_CANTREGISTER, "Can't register bitshuffle filter");
-    }
-    return retval;
-}
-
diff --git a/src/bitshuffle/src/bshuf_h5filter.h b/src/bitshuffle/src/bshuf_h5filter.h
deleted file mode 100644
index 0a8fa6a3..00000000
--- a/src/bitshuffle/src/bshuf_h5filter.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Bitshuffle HDF5 filter
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- *
- * Header File
- *
- * Filter Options
- * --------------
- *  block_size (option slot 0) : interger (optional)
- *      What block size to use (in elements not bytes). Default is 0,
- *      for which bitshuffle will pick a block size with a target of 8kb.
- *  Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
- *      Whether to apply LZ4 compression to the data after bitshuffling.
- *      This is much faster than applying compression as a second filter
- *      because it is done when the small block of data is already in the
- *      L1 cache.
- *
- *      For LZ4 compression, the compressed format of the data is the same as
- *      for the normal LZ4 filter described in
- *      http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
- *
- */
-
-
-#ifndef BSHUF_H5FILTER_H
-#define BSHUF_H5FILTER_H
-
-#define H5Z_class_t_vers 2
-#include "hdf5.h"
-
-
-#define BSHUF_H5FILTER 32008
-
-
-#define BSHUF_H5_COMPRESS_LZ4 2
-
-
-extern H5Z_class_t bshuf_H5Filter[1];
-
-
-/* ---- bshuf_register_h5filter ----
- *
- * Register the bitshuffle HDF5 filter within the HDF5 library.
- *
- * Call this before using the bitshuffle HDF5 filter from C unless
- * using dynamically loaded filters.
- *
- */
-int bshuf_register_h5filter(void);
-
-
-#endif // BSHUF_H5FILTER_H
diff --git a/src/bitshuffle/src/bshuf_h5plugin.c b/src/bitshuffle/src/bshuf_h5plugin.c
deleted file mode 100644
index 22e99929..00000000
--- a/src/bitshuffle/src/bshuf_h5plugin.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Dynamically loaded filter plugin for HDF5 Bitshuffle filter.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-
-#include "bshuf_h5filter.h"
-#include "H5PLextern.h"
-
-H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
-const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;}
-
diff --git a/src/bitshuffle/src/iochain.c b/src/bitshuffle/src/iochain.c
deleted file mode 100644
index baa97296..00000000
--- a/src/bitshuffle/src/iochain.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * IOchain - Distribute a chain of dependant IO events amoung threads.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-#include <stdlib.h>
-#include "iochain.h"
-
-
-void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) {
-#ifdef _OPENMP
-    omp_init_lock(&C->next_lock);
-    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
-        omp_init_lock(&(C->in_pl[ii].lock));
-        omp_init_lock(&(C->out_pl[ii].lock));
-    }
-#endif
-    C->next = 0;
-    C->in_pl[0].ptr = in_ptr_0;
-    C->out_pl[0].ptr = out_ptr_0;
-}
-
-
-void ioc_destroy(ioc_chain *C) {
-#ifdef _OPENMP
-    omp_destroy_lock(&C->next_lock);
-    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
-        omp_destroy_lock(&(C->in_pl[ii].lock));
-        omp_destroy_lock(&(C->out_pl[ii].lock));
-    }
-#endif
-}
-
-
-const void * ioc_get_in(ioc_chain *C, size_t *this_iter) {
-#ifdef _OPENMP
-    omp_set_lock(&C->next_lock);
-    #pragma omp flush
-#endif
-    *this_iter = C->next;
-    C->next ++;
-#ifdef _OPENMP
-    omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock));
-    omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
-    omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
-    omp_unset_lock(&C->next_lock);
-#endif
-    return C->in_pl[*this_iter % IOC_SIZE].ptr;
-}
-
-
-void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) {
-    C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr;
-#ifdef _OPENMP
-    omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
-#endif
-}
-
-
-void * ioc_get_out(ioc_chain *C, size_t *this_iter) {
-#ifdef _OPENMP
-    omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
-    #pragma omp flush
-#endif
-    void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr;
-#ifdef _OPENMP
-    omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
-#endif
-    return out_ptr;
-}
-
-
-void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
-    C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
-#ifdef _OPENMP
-    omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
-    // *in_pl[this_iter]* lock released at the end of the iteration to avoid being
-    // overtaken by previous threads and having *out_pl[this_iter]* corrupted.
-    // Especially worried about thread 0, iteration 0.
-    omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
-#endif
-}
-
diff --git a/src/bitshuffle/src/iochain.h b/src/bitshuffle/src/iochain.h
deleted file mode 100644
index 4e225d1b..00000000
--- a/src/bitshuffle/src/iochain.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * IOchain - Distribute a chain of dependant IO events amoung threads.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- *
- * Header File
- *
- * Similar in concept to a queue. Each task includes reading an input
- * and writing output, but the location of the input/output (the pointers)
- * depend on the previous item in the chain.
- *
- * This is designed for parallelizing blocked compression/decompression IO,
- * where the destination of a compressed block depends on the compressed size
- * of all previous blocks.
- *
- * Implemented with OpenMP locks.
- *
- *
- * Usage
- * -----
- *  - Call `ioc_init` in serial block.
- *  - Each thread should create a local variable *size_t this_iter* and 
- *    pass its address to all function calls. Its value will be set
- *    inside the functions and is used to identify the thread.
- *  - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
- *    exactly once per iteration, starting with `ioc_get_in` and ending
- *    with `ioc_set_next_out`.
- *  - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`,
- *    `ioc_set_next_out`, *work*) is most efficient.
- *  - Have each thread call `ioc_end_pop`.
- *  - `ioc_get_in` is blocked until the previous entry's
- *    `ioc_set_next_in` is called.
- *  - `ioc_get_out` is blocked until the previous entry's
- *    `ioc_set_next_out` is called.
- *  - There are no blocks on the very first iteration.
- *  - Call `ioc_destroy` in serial block.
- *  - Safe for num_threads >= IOC_SIZE (but less efficient).
- *
- */
-
-
-#ifndef IOCHAIN_H
-#define IOCHAIN_H
-
-
-#include <stdlib.h>
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-
-#define IOC_SIZE 33
-
-
-typedef struct ioc_ptr_and_lock {
-#ifdef _OPENMP
-    omp_lock_t lock;
-#endif
-    void *ptr;
-} ptr_and_lock;
-
-typedef struct ioc_const_ptr_and_lock {
-#ifdef _OPENMP
-    omp_lock_t lock;
-#endif
-    const void *ptr;
-} const_ptr_and_lock;
-
-
-typedef struct ioc_chain {
-#ifdef _OPENMP
-    omp_lock_t next_lock;
-#endif
-    size_t next;
-    const_ptr_and_lock in_pl[IOC_SIZE];
-    ptr_and_lock out_pl[IOC_SIZE];
-} ioc_chain;
-
-
-void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0);
-void ioc_destroy(ioc_chain *C);
-const void * ioc_get_in(ioc_chain *C, size_t *this_iter);
-void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
-void * ioc_get_out(ioc_chain *C, size_t *this_iter);
-void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
-
-#endif  // IOCHAIN_H
-
diff --git a/src/bitshuffle/src/lzf_h5plugin.c b/src/bitshuffle/src/lzf_h5plugin.c
deleted file mode 100644
index cbf7e3d8..00000000
--- a/src/bitshuffle/src/lzf_h5plugin.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Dynamically loaded filter plugin for HDF5 LZF filter.
- *
- * This file is part of Bitshuffle
- * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
- * Website: http://www.github.com/kiyo-masui/bitshuffle
- * Created: 2014
- *
- * See LICENSE file for details about copyright and rights to use.
- *
- */
-
-
-#define H5Z_class_t_vers 2
-#include "lzf_filter.h"
-#include "H5PLextern.h"
-
-#include <stdint.h>
-
-
-size_t lzf_filter(unsigned flags, size_t cd_nelmts,
-                  const unsigned cd_values[], size_t nbytes,
-                  size_t *buf_size, void **buf);
-
-
-herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
-
-
-H5Z_class_t lzf_H5Filter[1] = {{
-    H5Z_CLASS_T_VERS,
-    (H5Z_filter_t)(H5PY_FILTER_LZF),
-    1, 1,
-    "lzf",
-    NULL,
-    (H5Z_set_local_func_t)(lzf_set_local),
-    (H5Z_func_t)(lzf_filter)
-}};
-
-
-H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
-const void* H5PLget_plugin_info(void) {return lzf_H5Filter;}
-

From 3db00d4d3691551ff33b4068a5b9e900d9140ec5 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Tue, 8 Nov 2022 11:43:35 +0100
Subject: [PATCH 5/7] Squashed 'src/bitshuffle/' content from commit a60471d3

git-subtree-dir: src/bitshuffle
git-subtree-split: a60471d37a8cbbd8265dc8cfa83a9320abdcb590
---
 .github/workflows/flake8_cython.cfg |    4 +
 .github/workflows/flake8_python.cfg |    3 +
 .github/workflows/install_hdf5.sh   |   10 +
 .github/workflows/lint.yml          |   32 +
 .github/workflows/main.yml          |   58 +
 .github/workflows/wheels.yml        |   98 ++
 .gitignore                          |   79 +
 .gitmodules                         |    3 +
 LICENSE                             |   21 +
 MANIFEST.in                         |   10 +
 README.rst                          |  246 +++
 bitshuffle/__init__.py              |   54 +
 bitshuffle/ext.pyx                  |  569 ++++++
 bitshuffle/h5.pyx                   |  235 +++
 conda-recipe/bld.bat                |    3 +
 conda-recipe/build.sh               |    2 +
 conda-recipe/meta.yaml              |   27 +
 conda-recipe/setup.py.patch         |   13 +
 lz4/LICENSE                         |   24 +
 lz4/README.md                       |   21 +
 lz4/lz4.c                           | 2495 +++++++++++++++++++++++++++
 lz4/lz4.h                           |  774 +++++++++
 lzf/LICENSE.txt                     |   34 +
 lzf/README.txt                      |   84 +
 lzf/README_bitshuffle.txt           |    3 +
 lzf/example.c                       |  106 ++
 lzf/lzf/lzf.h                       |  100 ++
 lzf/lzf/lzfP.h                      |  166 ++
 lzf/lzf/lzf_c.c                     |  296 ++++
 lzf/lzf/lzf_d.c                     |  154 ++
 lzf/lzf_filter.c                    |  261 +++
 lzf/lzf_filter.h                    |   38 +
 pyproject.toml                      |   10 +
 requirements.txt                    |    5 +
 setup.cfg.example                   |   10 +
 setup.py                            |  419 +++++
 src/bitshuffle.c                    |  279 +++
 src/bitshuffle.h                    |  205 +++
 src/bitshuffle_core.c               | 1864 ++++++++++++++++++++
 src/bitshuffle_core.h               |  169 ++
 src/bitshuffle_internals.h          |   75 +
 src/bshuf_h5filter.c                |  260 +++
 src/bshuf_h5filter.h                |   67 +
 src/bshuf_h5plugin.c                |   19 +
 src/hdf5_dl.c                       |  358 ++++
 src/iochain.c                       |   90 +
 src/iochain.h                       |   94 +
 src/lzf_h5plugin.c                  |   42 +
 tests/data/regression_0.1.3.h5      |  Bin 0 -> 114447 bytes
 tests/data/regression_0.4.0.h5      |  Bin 0 -> 194482 bytes
 tests/make_regression_tdata.py      |   69 +
 tests/test_ext.py                   |  627 +++++++
 tests/test_h5filter.py              |  138 ++
 tests/test_h5plugin.py              |   66 +
 tests/test_regression.py            |   46 +
 zstd                                |    1 +
 56 files changed, 10936 insertions(+)
 create mode 100644 .github/workflows/flake8_cython.cfg
 create mode 100644 .github/workflows/flake8_python.cfg
 create mode 100644 .github/workflows/install_hdf5.sh
 create mode 100644 .github/workflows/lint.yml
 create mode 100644 .github/workflows/main.yml
 create mode 100644 .github/workflows/wheels.yml
 create mode 100644 .gitignore
 create mode 100644 .gitmodules
 create mode 100644 LICENSE
 create mode 100644 MANIFEST.in
 create mode 100644 README.rst
 create mode 100644 bitshuffle/__init__.py
 create mode 100644 bitshuffle/ext.pyx
 create mode 100644 bitshuffle/h5.pyx
 create mode 100644 conda-recipe/bld.bat
 create mode 100644 conda-recipe/build.sh
 create mode 100644 conda-recipe/meta.yaml
 create mode 100644 conda-recipe/setup.py.patch
 create mode 100644 lz4/LICENSE
 create mode 100644 lz4/README.md
 create mode 100644 lz4/lz4.c
 create mode 100644 lz4/lz4.h
 create mode 100644 lzf/LICENSE.txt
 create mode 100644 lzf/README.txt
 create mode 100644 lzf/README_bitshuffle.txt
 create mode 100644 lzf/example.c
 create mode 100644 lzf/lzf/lzf.h
 create mode 100644 lzf/lzf/lzfP.h
 create mode 100644 lzf/lzf/lzf_c.c
 create mode 100644 lzf/lzf/lzf_d.c
 create mode 100644 lzf/lzf_filter.c
 create mode 100644 lzf/lzf_filter.h
 create mode 100644 pyproject.toml
 create mode 100644 requirements.txt
 create mode 100644 setup.cfg.example
 create mode 100644 setup.py
 create mode 100644 src/bitshuffle.c
 create mode 100644 src/bitshuffle.h
 create mode 100644 src/bitshuffle_core.c
 create mode 100644 src/bitshuffle_core.h
 create mode 100644 src/bitshuffle_internals.h
 create mode 100644 src/bshuf_h5filter.c
 create mode 100644 src/bshuf_h5filter.h
 create mode 100644 src/bshuf_h5plugin.c
 create mode 100644 src/hdf5_dl.c
 create mode 100644 src/iochain.c
 create mode 100644 src/iochain.h
 create mode 100644 src/lzf_h5plugin.c
 create mode 100644 tests/data/regression_0.1.3.h5
 create mode 100644 tests/data/regression_0.4.0.h5
 create mode 100644 tests/make_regression_tdata.py
 create mode 100644 tests/test_ext.py
 create mode 100644 tests/test_h5filter.py
 create mode 100644 tests/test_h5plugin.py
 create mode 100644 tests/test_regression.py
 create mode 160000 zstd

diff --git a/.github/workflows/flake8_cython.cfg b/.github/workflows/flake8_cython.cfg
new file mode 100644
index 00000000..9e5b5389
--- /dev/null
+++ b/.github/workflows/flake8_cython.cfg
@@ -0,0 +1,4 @@
+[flake8]
+filename=*.pyx,*.pxd
+select=E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
+show_source=True
diff --git a/.github/workflows/flake8_python.cfg b/.github/workflows/flake8_python.cfg
new file mode 100644
index 00000000..b0760928
--- /dev/null
+++ b/.github/workflows/flake8_python.cfg
@@ -0,0 +1,3 @@
+[flake8]
+ignore=E501,E203,W503,E266
+show_source=True
diff --git a/.github/workflows/install_hdf5.sh b/.github/workflows/install_hdf5.sh
new file mode 100644
index 00000000..58b2bdb4
--- /dev/null
+++ b/.github/workflows/install_hdf5.sh
@@ -0,0 +1,10 @@
+HDF5_VERSION=$1
+
+# Download and install HDF5 $HDF5_VERSION from source for building wheels
+curl https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-${HDF5_VERSION%.*}/hdf5-$HDF5_VERSION/src/hdf5-$HDF5_VERSION.tar.gz -O -s
+tar -xzf hdf5-$HDF5_VERSION.tar.gz
+cd hdf5-$HDF5_VERSION
+./configure --prefix=/usr/local
+make -j 2
+make install
+cd ..
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 00000000..6d828a1c
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,32 @@
+name: bitshuffle-ci-build
+on:
+  pull_request:
+    branches:
+    - master
+  push:
+    branches:
+    - master
+
+jobs:
+
+  lint-code:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.10"
+
+      - name: Install pip dependencies
+        run: |
+          pip install black flake8
+
+      - name: Run flake8
+        run: |
+          flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_python.cfg bitshuffle tests
+          flake8 --config $GITHUB_WORKSPACE/.github/workflows/flake8_cython.cfg bitshuffle tests
+
+      - name: Check code with black
+        run: black --check .
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 00000000..8ec96b64
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,58 @@
+name: bitshuffle-ci-build
+on:
+  pull_request:
+    branches:
+    - master
+  push:
+    branches:
+    - master
+
+jobs:
+  run-tests:
+
+    strategy:
+      matrix:
+        python-version: ["3.6", "3.7", "3.10"]
+        os: [ubuntu-latest, macos-latest]
+        exclude:
+        - os: macos-latest
+          python-version: "3.6"
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Install apt dependencies
+      if: ${{ matrix.os == 'ubuntu-latest' }}
+      run: |
+        sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config
+
+    - name: Install homebrew dependencies
+      if: ${{ matrix.os == 'macos-latest' }}
+      run: |
+        brew install hdf5 pkg-config
+
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install h5py
+      if: ${{ matrix.os == 'macos-latest' }}
+      run: |
+        pip install h5py
+
+    - name: Install pip dependencies
+      run: |
+        pip install Cython
+        pip install -r requirements.txt
+        pip install pytest
+
+        # Pull in ZSTD repo
+        git submodule update --init
+
+        # Installing the plugin to arbitrary directory to check the install script.
+        python setup.py install --h5plugin --h5plugin-dir ~/hdf5/lib --zstd
+
+    - name: Run tests
+      run: pytest -v .
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 00000000..def84e0b
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,98 @@
+name: Build bitshuffle wheels and upload to PyPI
+
+on:
+  workflow_dispatch:
+  release:
+    types:
+    - published
+
+jobs:
+  build_wheels:
+    name: Build wheels on ${{ matrix.os }} and hdf5-${{ matrix.hdf5 }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        hdf5: ["1.10.7"]
+
+    steps:
+      # Checkout bitshuffle
+      - uses: actions/checkout@v2
+
+      # Build wheels for linux and x86 platforms
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v2.3.1
+        with:
+          output-dir: ./wheelhouse-hdf5-${{ matrix.hdf5}}
+        env:
+          CIBW_SKIP: "pp* *musllinux*"
+          CIBW_ARCHS_LINUX: "x86_64"
+          CIBW_BEFORE_ALL: |
+            chmod +x .github/workflows/install_hdf5.sh
+            .github/workflows/install_hdf5.sh ${{ matrix.hdf5 }}
+            git submodule update --init
+          CIBW_ENVIRONMENT: |
+            LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ENABLE_ZSTD=1
+          CIBW_TEST_REQUIRES: pytest
+          # Install different version of HDF5 for unit tests to ensure the
+          # wheels are independent of HDF5 installation
+          # CIBW_BEFORE_TEST: |
+          #   chmod +x .github/workflows/install_hdf5.sh
+          #   .github/workflows/install_hdf5.sh 1.8.11
+          # Run units tests but disable test_h5plugin.py
+          CIBW_TEST_COMMAND: pytest {package}/tests
+
+      # Package wheels and host on CI
+      - uses: actions/upload-artifact@v2
+        with:
+          path: ./wheelhouse-hdf5-${{ matrix.hdf5 }}/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    strategy:
+      matrix:
+        python-version: ["3.8"]
+
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install apt dependencies
+        run: |
+          sudo apt-get install -y libhdf5-serial-dev hdf5-tools pkg-config
+
+      - name: Install Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install pip dependencies
+        run: |
+          pip install -r requirements.txt
+
+      - name: Build sdist
+        run: python setup.py sdist
+
+      - uses: actions/upload-artifact@v2
+        with:
+          path: dist/*.tar.gz
+
+  # Upload to PyPI
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    # Upload to PyPI on every tag
+    # if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags')
+    # Alternatively, to publish when a GitHub Release is created, use the following rule:
+    if: github.event_name == 'release' && github.event.action == 'published'
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: artifact
+          path: dist
+
+      - uses: pypa/gh-action-pypi-publish@v1.4.2
+        with:
+          user: __token__
+          password: ${{ secrets.pypi_password }}
+          # To test: repository_url: https://test.pypi.org/legacy/
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..f4a98eab
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,79 @@
+## C
+
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Libraries
+*.lib
+*.a
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+
+## Python
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+__pycache__
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Documentation builds
+doc/_build
+doc/generated
+
+## Editor files and backups.
+*.swp
+*.swo
+
+# Generated files
+bitshuffle/ext.c
+bitshuffle/h5.c
+
+# ItelliJ
+.idea
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..5ebea353
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "zstd"]
+	path = zstd
+	url = https://github.com/facebook/zstd
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..1365ed69
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+Bitshuffle - Filter for improving compression of typed binary data.
+
+Copyright (c) 2014 Kiyoshi Masui (kiyo@physics.ubc.ca)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 00000000..00746c64
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,10 @@
+recursive-include src *.h *.c
+recursive-include bitshuffle *.pyx
+recursive-include lz4 *.h *.c
+recursive-include lzf *.h *.c
+include setup.cfg.example
+include LICENSE
+include README.rst
+include requirements.txt
+exclude setup.cfg
+
diff --git a/README.rst b/README.rst
new file mode 100644
index 00000000..7e4be25f
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,246 @@
+==========
+Bitshuffle
+==========
+
+Filter for improving compression of typed binary data.
+
+Bitshuffle is an algorithm that rearranges typed, binary data for improving
+compression, as well as a python/C package that implements this algorithm
+within the Numpy framework.
+
+The library can be used along side HDF5 to compress and decompress datasets and
+is integrated through the `dynamically loaded filters`_ framework. Bitshuffle
+is HDF5 filter number ``32008``.
+
+Algorithmically, Bitshuffle is closely related to HDF5's `Shuffle filter`_
+except it operates at the bit level instead of the byte level. Arranging a
+typed data array in to a matrix with the elements as the rows and the bits
+within the elements as the columns, Bitshuffle "transposes" the matrix,
+such that all the least-significant-bits are in a row, etc.  This transpose
+is performed within blocks of data roughly 8kB long [1]_.
+
+This does not in itself compress data, only rearranges it for more efficient
+compression. To perform the actual compression you will need a compression
+library.  Bitshuffle has been designed to be well matched to Marc Lehmann's
+LZF_ as well as LZ4_ and ZSTD_. Note that because Bitshuffle modifies the data at the bit
+level, sophisticated entropy reducing compression libraries such as GZIP and
+BZIP are unlikely to achieve significantly better compression than simpler and
+faster duplicate-string-elimination algorithms such as LZF, LZ4 and ZSTD. Bitshuffle
+thus includes routines (and HDF5 filter options) to apply LZ4 and ZSTD compression to
+each block after shuffling [2]_.
+
+The Bitshuffle algorithm relies on neighbouring elements of a dataset being
+highly correlated to improve data compression. Any correlations that span at
+least 24 elements of the dataset may be exploited to improve compression.
+
+Bitshuffle was designed with performance in mind. On most machines the
+time required for Bitshuffle+LZ4 is insignificant compared to the time required
+to read or write the compressed data to disk. Because it is able to exploit the
+SSE and AVX instruction sets present on modern Intel and AMD processors, on
+these machines compression is only marginally slower than an out-of-cache
+memory copy.  On modern x86 processors you can expect Bitshuffle to have a
+throughput of roughly 1 byte per clock cycle, and on the Haswell generation of
+Intel processors (2013) and later, you can expect up to 2 bytes per clock
+cycle. In addition, Bitshuffle is parallelized using OpenMP.
+
+As a bonus, Bitshuffle ships with a dynamically loaded version of
+`h5py`'s LZF compression filter, such that the filter can be transparently
+used outside of python and in command line utilities such as ``h5dump``.
+
+.. [1] Chosen to fit comfortably within L1 cache as well as be well matched
+       window of the LZF compression library.
+
+.. [2] Over applying bitshuffle to the full dataset then applying LZ4/ZSTD
+       compression, this has the tremendous advantage that the block is
+       already in the L1 cache.
+
+.. _`dynamically loaded filters`: http://www.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf
+
+.. _`Shuffle filter`: http://www.hdfgroup.org/HDF5/doc_resource/H5Shuffle_Perf.pdf
+
+.. _LZF: http://oldhome.schmorp.de/marc/liblzf.html
+
+.. _LZ4: https://code.google.com/p/lz4/
+
+.. _ZSTD: https://github.com/facebook/zstd
+
+
+Applications
+------------
+
+Bitshuffle might be right for your application if:
+
+- You need to compress typed binary data.
+- Your data is arranged such that adjacent elements over the fastest varying
+  index of your dataset are similar (highly correlated).
+- A special case of the previous point is if you are only exercising a subset
+  of the bits in your data-type, as is often true of integer data.
+- You need both high compression ratios and high performance.
+
+
+Comparing Bitshuffle to other compression algorithms and HDF5 filters:
+
+- Bitshuffle is less general than many other compression algorithms.
+  To achieve good compression ratios, consecutive elements of your data must
+  be highly correlated.
+- For the right datasets, Bitshuffle is one of the few compression
+  algorithms that promises both high throughput and high compression ratios.
+- Bitshuffle should have roughly the same throughput as Shuffle, but
+  may obtain higher compression ratios.
+- The MAFISC_ filter actually includes something similar to Bitshuffle as one of
+  its prefilters,  However, MAFICS's emphasis is on obtaining high compression
+  ratios at all costs, sacrificing throughput.
+
+.. _MAFISC: http://wr.informatik.uni-hamburg.de/research/projects/icomex/mafisc
+
+
+Installation for Python
+-----------------------
+
+Installation requires python 2.7+ or 3.3+, HDF5 1.8.4 or later, HDF5 for python
+(h5py), Numpy and Cython. Bitshuffle is linked against HDF5. To use the dynamically 
+loaded HDF5 filter requires HDF5 1.8.11 or later. If ZSTD support is enabled the ZSTD 
+repo needs to pulled into bitshuffle before installation with::
+
+    git submodule update --init
+
+To install bitshuffle::
+
+    python setup.py install [--h5plugin [--h5plugin-dir=spam] --zstd]
+
+To get finer control of installation options, including whether to compile
+with OpenMP multi-threading, copy the ``setup.cfg.example`` to ``setup.cfg``
+and edit the values therein.
+
+If using the dynamically loaded HDF5 filter (which gives you access to the
+Bitshuffle and LZF filters outside of python), set the environment variable
+``HDF5_PLUGIN_PATH`` to the value of ``--h5plugin-dir`` or use HDF5's default
+search location of ``/usr/local/hdf5/lib/plugin``.
+
+ZSTD support is enabled with ``--zstd``.
+
+If you get an error about missing source files when building the extensions,
+try upgrading setuptools.  There is a weird bug where setuptools prior to 0.7
+doesn't work properly with Cython in some cases.
+
+.. _source: http://docs.h5py.org/en/latest/build.html#source-installation
+
+
+Usage from Python
+-----------------
+
+The `bitshuffle` module contains routines for shuffling and unshuffling
+Numpy arrays.
+
+If installed with the dynamically loaded filter plugins, Bitshuffle can be used
+in conjunction with HDF5 both inside and outside of python, in the same way as
+any other filter; simply by specifying the filter number ``32008``. Otherwise
+the filter will be available only within python and only after importing
+`bitshuffle.h5`. Reading Bitshuffle encoded datasets will be transparent.
+The filter can be added to new datasets either through the `h5py` low level
+interface or through the convenience functions provided in
+`bitshuffle.h5`. See the docstrings and unit tests for examples. For `h5py`
+version 2.5.0 and later Bitshuffle can be added to new datasets through the
+high level interface, as in the example below.
+
+The compression algorithm can be configured using the `filter_opts` in 
+`bitshuffle.h5.create_dataset()`. LZ4 is chosen with: 
+`(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)` and ZSTD with: 
+`(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)`. See `test_h5filter.py` for an example.
+
+Example h5py
+------------
+::
+
+    import h5py
+    import numpy
+    import bitshuffle.h5
+
+    print(h5py.__version__) # >= '2.5.0'
+
+    f = h5py.File(filename, "w")
+
+    # block_size = 0 let Bitshuffle choose its value
+    block_size = 0
+
+    dataset = f.create_dataset(
+        "data",
+        (100, 100, 100),
+        compression=bitshuffle.h5.H5FILTER,
+        compression_opts=(block_size, bitshuffle.h5.H5_COMPRESS_LZ4),
+        dtype='float32',
+        )
+
+    # create some random data
+    array = numpy.random.rand(100, 100, 100)
+    array = array.astype('float32')
+
+    dataset[:] = array
+
+    f.close()
+
+
+Usage from C
+------------
+
+If you wish to use Bitshuffle in your C program and would prefer not to use the
+HDF5 dynamically loaded filter, the C library in the ``src/`` directory is
+self-contained and complete.
+
+
+Usage from Java
+---------------
+
+You can use Bitshuffle even in Java and the routines for shuffling and unshuffling
+are ported into `snappy-java`_. To use the routines, you need to add the following
+dependency to your pom.xml::
+
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <version>1.1.3-M1</version>
+    </dependency>
+
+First, import org.xerial.snapy.BitShuffle in your Java code::
+
+    import org.xerial.snappy.BitShuffle;
+
+Then, you use them like this::
+
+    int[] data = new int[] {1, 3, 34, 43, 34};
+    byte[] shuffledData = BitShuffle.bitShuffle(data);
+    int[] result = BitShuffle.bitUnShuffleIntArray(shuffledData);
+
+.. _`snappy-java`: https://github.com/xerial/snappy-java
+
+
+Anaconda
+--------
+
+The conda package can be build via::
+
+    conda build conda-recipe
+
+
+For Best Results
+----------------
+
+Here are a few tips to help you get the most out of Bitshuffle:
+
+- For multi-dimensional datasets, order your data such that the fastest varying
+  dimension is the one over which your data is most correlated (have
+  values that change the least), or fake this using chunks.
+- To achieve the highest throughput, use a data type that is 64 *bytes* or
+  smaller. If you have a very large compound data type, consider adding a
+  dimension to your datasets instead.
+- To make full use of the SSE2 instruction set, use a data type whose size
+  is a multiple of 2 bytes. For the AVX2 instruction set, use a data type whose
+  size is a multiple of 4 bytes.
+
+
+Citing Bitshuffle
+-----------------
+
+Bitshuffle was initially described in
+http://dx.doi.org/10.1016/j.ascom.2015.07.002, pre-print available at
+http://arxiv.org/abs/1503.00638.
diff --git a/bitshuffle/__init__.py b/bitshuffle/__init__.py
new file mode 100644
index 00000000..3f7c0380
--- /dev/null
+++ b/bitshuffle/__init__.py
@@ -0,0 +1,54 @@
+# flake8: noqa
+"""
+Filter for improving compression of typed binary data.
+
+Functions
+=========
+
+    using_NEON
+    using_SSE2
+    using_AVX2
+    bitshuffle
+    bitunshuffle
+    compress_lz4
+    decompress_lz4
+    compress_zstd
+    decompress_zstd
+
+"""
+
+from __future__ import absolute_import
+
+
+from bitshuffle.ext import (
+    __version__,
+    __zstd__,
+    bitshuffle,
+    bitunshuffle,
+    using_NEON,
+    using_SSE2,
+    using_AVX2,
+    compress_lz4,
+    decompress_lz4,
+)
+
+# Import ZSTD API if enabled
+zstd_api = []
+if __zstd__:
+    from bitshuffle.ext import (
+        compress_zstd,
+        decompress_zstd,
+    )
+
+    zstd_api += ["compress_zstd", "decompress_zstd"]
+
+__all__ = [
+    "__version__",
+    "bitshuffle",
+    "bitunshuffle",
+    "using_NEON",
+    "using_SSE2",
+    "using_AVX2",
+    "compress_lz4",
+    "decompress_lz4",
+] + zstd_api
diff --git a/bitshuffle/ext.pyx b/bitshuffle/ext.pyx
new file mode 100644
index 00000000..edc9c588
--- /dev/null
+++ b/bitshuffle/ext.pyx
@@ -0,0 +1,569 @@
+"""
+Wrappers for public and private bitshuffle routines
+
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import numpy as np
+
+cimport numpy as np
+cimport cython
+
+
+np.import_array()
+
+
+# Repeat each calculation this many times. For timing.
+cdef int REPEATC = 1
+# cdef int REPEATC = 32
+
+REPEAT = REPEATC
+
+cdef extern from b"bitshuffle.h":
+    int bshuf_using_NEON()
+    int bshuf_using_SSE2()
+    int bshuf_using_AVX2()
+    int bshuf_bitshuffle(void *A, void *B, int size, int elem_size,
+                         int block_size) nogil
+    int bshuf_bitunshuffle(void *A, void *B, int size, int elem_size,
+                           int block_size) nogil
+    int bshuf_compress_lz4_bound(int size, int elem_size, int block_size)
+    int bshuf_compress_lz4(void *A, void *B, int size, int elem_size,
+                           int block_size) nogil
+    int bshuf_decompress_lz4(void *A, void *B, int size, int elem_size,
+                             int block_size) nogil
+    IF ZSTD_SUPPORT:
+        int bshuf_compress_zstd_bound(int size, int elem_size, int block_size)
+        int bshuf_compress_zstd(void *A, void *B, int size, int elem_size,
+                                int block_size, const int comp_lvl) nogil
+        int bshuf_decompress_zstd(void *A, void *B, int size, int elem_size,
+                                  int block_size) nogil
+    int BSHUF_VERSION_MAJOR
+    int BSHUF_VERSION_MINOR
+    int BSHUF_VERSION_POINT
+
+__version__ = "%d.%d.%d" % (BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
+                            BSHUF_VERSION_POINT)
+
+IF ZSTD_SUPPORT:
+    __zstd__ = True
+ELSE:
+    __zstd__ = False
+
+# Prototypes from bitshuffle.c
+cdef extern int bshuf_copy(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_byte_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bitrow_eight(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_byte_bitrow_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_shuffle_bit_eightelem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_SSE(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_NEON(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_AVX(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem_scal(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_trans_bit_elem(void *A, void *B, int size, int elem_size)
+cdef extern int bshuf_untrans_bit_elem(void *A, void *B, int size, int elem_size)
+
+
+ctypedef int (*Cfptr) (void *A, void *B, int size, int elem_size)
+
+
+def using_NEON():
+    """Whether compiled using Arm NEON instructions."""
+    if bshuf_using_NEON():
+        return True
+    else:
+        return False
+
+
+def using_SSE2():
+    """Whether compiled using SSE2 instructions."""
+    if bshuf_using_SSE2():
+        return True
+    else:
+        return False
+
+
+def using_AVX2():
+    """Whether compiled using AVX2 instructions."""
+    if bshuf_using_AVX2():
+        return True
+    else:
+        return False
+
+
+def _setup_arr(arr):
+    shape = tuple(arr.shape)
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    out = np.empty(shape, dtype=dtype)
+    return out, size, itemsize
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef _wrap_C_fun(Cfptr fun, np.ndarray arr):
+    """Wrap a C function with standard call signature."""
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    for ii in range(REPEATC):
+        count = fun(arr_ptr, out_ptr, size, itemsize)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+def copy(np.ndarray arr not None):
+    """Copies the data.
+
+    For testing and profiling purposes.
+
+    """
+    return _wrap_C_fun(&bshuf_copy, arr)
+
+
+def trans_byte_elem_scal(np.ndarray arr not None):
+    """Transpose bytes within words but not bits.
+
+    """
+    return _wrap_C_fun(&bshuf_trans_byte_elem_scal, arr)
+
+
+def trans_byte_elem_SSE(np.ndarray arr not None):
+    """Transpose bytes within array elements.
+
+    """
+    return _wrap_C_fun(&bshuf_trans_byte_elem_SSE, arr)
+
+
+def trans_byte_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_elem_NEON, arr)
+
+
+def trans_bit_byte_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_scal, arr)
+
+
+def trans_bit_byte_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_SSE, arr)
+
+
+def trans_bit_byte_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_NEON, arr)
+
+
+def trans_bit_byte_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_byte_AVX, arr)
+
+
+def trans_bitrow_eight(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bitrow_eight, arr)
+
+
+def trans_bit_elem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_AVX, arr)
+
+
+def trans_bit_elem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_scal, arr)
+
+
+def trans_bit_elem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_SSE, arr)
+
+
+def trans_bit_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem_NEON, arr)
+
+
+def trans_byte_bitrow_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_SSE, arr)
+
+
+def trans_byte_bitrow_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_NEON, arr)
+
+
+def trans_byte_bitrow_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_AVX, arr)
+
+
+def trans_byte_bitrow_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_byte_bitrow_scal, arr)
+
+
+def shuffle_bit_eightelem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_scal, arr)
+
+
+def shuffle_bit_eightelem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_SSE, arr)
+
+
+def shuffle_bit_eightelem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_NEON, arr)
+
+
+def shuffle_bit_eightelem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_shuffle_bit_eightelem_AVX, arr)
+
+
+def untrans_bit_elem_SSE(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_SSE, arr)
+
+
+def untrans_bit_elem_NEON(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_NEON, arr)
+
+
+def untrans_bit_elem_AVX(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_AVX, arr)
+
+
+def untrans_bit_elem_scal(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem_scal, arr)
+
+
+def trans_bit_elem(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_trans_bit_elem, arr)
+
+
+def untrans_bit_elem(np.ndarray arr not None):
+    return _wrap_C_fun(&bshuf_untrans_bit_elem, arr)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def bitshuffle(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle an array.
+
+    Output array is the same shape and data type as input array but underlying
+    buffer has been bitshuffled.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. By default, block size is chosen
+        automatically.
+
+    Returns
+    -------
+    out : numpy array
+        Array with the same shape as input but underlying data has been
+        bitshuffled.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_bitshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def bitunshuffle(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle an array.
+
+    Output array is the same shape and data type as input array but underlying
+    buffer has been un-bitshuffled.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. Must match value used for shuffling.
+
+    Returns
+    -------
+    out : numpy array
+        Array with the same shape as input but underlying data has been
+        un-bitshuffled.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    cdef np.ndarray out
+    out, size, itemsize = _setup_arr(arr)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_bitunshuffle(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def compress_lz4(np.ndarray arr not None, int block_size=0):
+    """Bitshuffle then compress an array using LZ4.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Data to ne processed.
+    block_size : positive integer
+        Block size in number of elements. By default, block size is chosen
+        automatically.
+
+    Returns
+    -------
+    out : array with np.uint8 data type
+        Buffer holding compressed data.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    shape = (arr.shape[i] for i in range(arr.ndim))
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+
+    max_out_size = bshuf_compress_lz4_bound(size, itemsize, block_size)
+
+    cdef np.ndarray out
+    out = np.empty(max_out_size, dtype=np.uint8)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_compress_lz4(arr_ptr, out_ptr, size, itemsize, block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    return out[:count]
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0):
+    """Decompress a buffer using LZ4 then bitunshuffle it yielding an array.
+
+    Parameters
+    ----------
+    arr : numpy array
+        Input data to be decompressed.
+    shape : tuple of integers
+        Shape of the output (decompressed array). Must match the shape of the
+        original data array before compression.
+    dtype : numpy dtype
+        Datatype of the output array. Must match the data type of the original
+        data array before compression.
+    block_size : positive integer
+        Block size in number of elements. Must match value used for
+        compression.
+
+    Returns
+    -------
+    out : numpy array with shape *shape* and data type *dtype*
+        Decompressed data.
+
+    """
+
+    cdef int ii, size, itemsize, count=0
+    if not arr.flags['C_CONTIGUOUS']:
+        msg = "Input array must be C-contiguous."
+        raise ValueError(msg)
+    size = np.prod(shape)
+    itemsize = dtype.itemsize
+
+    cdef np.ndarray out
+    out = np.empty(tuple(shape), dtype=dtype)
+
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+    arr_flat = arr.view(np.uint8).ravel()
+    cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+    out_flat = out.view(np.uint8).ravel()
+    cdef void* arr_ptr = <void*> &arr_flat[0]
+    cdef void* out_ptr = <void*> &out_flat[0]
+    with nogil:
+        for ii in range(REPEATC):
+            count = bshuf_decompress_lz4(arr_ptr, out_ptr, size, itemsize,
+                                         block_size)
+    if count < 0:
+        msg = "Failed. Error code %d."
+        excp = RuntimeError(msg % count, count)
+        raise excp
+    if count != arr.size:
+        msg = "Decompressed different number of bytes than input buffer size."
+        msg += "Input buffer %d, decompressed %d." % (arr.size, count)
+        raise RuntimeError(msg, count)
+    return out
+
+
+IF ZSTD_SUPPORT:
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def compress_zstd(np.ndarray arr not None, int block_size=0, int comp_lvl=1):
+        """Bitshuffle then compress an array using ZSTD.
+    
+        Parameters
+        ----------
+        arr : numpy array
+            Data to be processed.
+        block_size : positive integer
+            Block size in number of elements. By default, block size is chosen
+            automatically.
+        comp_lvl : positive integer
+            Compression level applied by ZSTD
+    
+        Returns
+        -------
+        out : array with np.uint8 data type
+            Buffer holding compressed data.
+    
+        """
+    
+        cdef int ii, size, itemsize, count=0
+        shape = (arr.shape[i] for i in range(arr.ndim))
+        if not arr.flags['C_CONTIGUOUS']:
+            msg = "Input array must be C-contiguous."
+            raise ValueError(msg)
+        size = arr.size
+        dtype = arr.dtype
+        itemsize = dtype.itemsize
+    
+        max_out_size = bshuf_compress_zstd_bound(size, itemsize, block_size)
+    
+        cdef np.ndarray out
+        out = np.empty(max_out_size, dtype=np.uint8)
+    
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+        arr_flat = arr.view(np.uint8).ravel()
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+        out_flat = out.view(np.uint8).ravel()
+        cdef void* arr_ptr = <void*> &arr_flat[0]
+        cdef void* out_ptr = <void*> &out_flat[0]
+        with nogil:
+            for ii in range(REPEATC):
+                count = bshuf_compress_zstd(arr_ptr, out_ptr, size, itemsize, block_size, comp_lvl)
+        if count < 0:
+            msg = "Failed. Error code %d."
+            excp = RuntimeError(msg % count, count)
+            raise excp
+        return out[:count]
+    
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def decompress_zstd(np.ndarray arr not None, shape, dtype, int block_size=0):
+        """Decompress a buffer using ZSTD then bitunshuffle it yielding an array.
+    
+        Parameters
+        ----------
+        arr : numpy array
+            Input data to be decompressed.
+        shape : tuple of integers
+            Shape of the output (decompressed array). Must match the shape of the
+            original data array before compression.
+        dtype : numpy dtype
+            Datatype of the output array. Must match the data type of the original
+            data array before compression.
+        block_size : positive integer
+            Block size in number of elements. Must match value used for
+            compression.
+    
+        Returns
+        -------
+        out : numpy array with shape *shape* and data type *dtype*
+            Decompressed data.
+    
+        """
+    
+        cdef int ii, size, itemsize, count=0
+        if not arr.flags['C_CONTIGUOUS']:
+            msg = "Input array must be C-contiguous."
+            raise ValueError(msg)
+        size = np.prod(shape)
+        itemsize = dtype.itemsize
+    
+        cdef np.ndarray out
+        out = np.empty(tuple(shape), dtype=dtype)
+    
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] arr_flat
+        arr_flat = arr.view(np.uint8).ravel()
+        cdef np.ndarray[dtype=np.uint8_t, ndim=1, mode="c"] out_flat
+        out_flat = out.view(np.uint8).ravel()
+        cdef void* arr_ptr = <void*> &arr_flat[0]
+        cdef void* out_ptr = <void*> &out_flat[0]
+        with nogil:
+            for ii in range(REPEATC):
+                count = bshuf_decompress_zstd(arr_ptr, out_ptr, size, itemsize,
+                                              block_size)
+        if count < 0:
+            msg = "Failed. Error code %d."
+            excp = RuntimeError(msg % count, count)
+            raise excp
+        if count != arr.size:
+            msg = "Decompressed different number of bytes than input buffer size."
+            msg += "Input buffer %d, decompressed %d." % (arr.size, count)
+            raise RuntimeError(msg, count)
+        return out
diff --git a/bitshuffle/h5.pyx b/bitshuffle/h5.pyx
new file mode 100644
index 00000000..c92e24c8
--- /dev/null
+++ b/bitshuffle/h5.pyx
@@ -0,0 +1,235 @@
+"""
+HDF5 support for Bitshuffle.
+
+To read a dataset that uses the Bitshuffle filter using h5py, simply import
+this module (unless you have installed the Bitshuffle dynamically loaded
+filter, in which case importing this module is unnecessary).
+
+To create a new dataset that includes the Bitshuffle filter, use one of the
+convenience functions provided.
+
+
+Constants
+=========
+
+    H5FILTER : The Bitshuffle HDF5 filter integer identifier.
+    H5_COMPRESS_LZ4 : Filter option flag for LZ4 compression.
+    H5_COMPRESS_ZSTD : Filter option flag for ZSTD compression.
+
+Functions
+=========
+
+    create_dataset
+    create_bitshuffle_lzf_dataset
+    create_bitshuffle_compressed_dataset
+
+Examples
+========
+
+    >>> import numpy as np
+    >>> import h5py
+    >>> import bitshuffle.h5
+
+    >>> shape = (123, 456)
+    >>> chunks = (10, 456)
+    >>> dtype = np.float64
+
+    >>> f = h5py.File("tmp_test.h5")
+    >>> bitshuffle.h5.create_bitshuffle_compressed_dataset(
+            f, "some_data", shape, dtype, chunks)
+    >>> f["some_data"][:] = 42
+
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+import numpy
+import h5py
+from h5py import h5d, h5fd, h5s, h5t, h5p, h5z, defs, filters
+
+cimport cython
+
+
+cdef extern from b"bshuf_h5filter.h":
+    int bshuf_register_h5filter()
+    int BSHUF_H5FILTER
+    int BSHUF_H5_COMPRESS_LZ4
+    int BSHUF_H5_COMPRESS_ZSTD
+
+cdef extern int init_filter(const char* libname)
+
+cdef int LZF_FILTER = 32000
+
+H5FILTER = BSHUF_H5FILTER
+H5_COMPRESS_LZ4 = BSHUF_H5_COMPRESS_LZ4
+H5_COMPRESS_ZSTD = BSHUF_H5_COMPRESS_ZSTD
+
+# Init HDF5 dynamic loading with HDF5 library used by h5py
+if not sys.platform.startswith('win'):
+    if sys.version_info[0] >= 3:
+        libs = [bytes(h5d.__file__, encoding='utf-8'),
+                bytes(h5fd.__file__, encoding='utf-8'),
+                bytes(h5s.__file__, encoding='utf-8'),
+                bytes(h5t.__file__, encoding='utf-8'),
+                bytes(h5p.__file__, encoding='utf-8'),
+                bytes(h5z.__file__, encoding='utf-8'),
+                bytes(defs.__file__, encoding='utf-8')]
+    else:
+        libs = [h5d.__file__, h5fd.__file__, h5s.__file__, h5t.__file__,
+                h5p.__file__, h5z.__file__, defs.__file__]
+
+    # Ensure all symbols are loaded
+    success = -1
+    for lib in libs:
+        success = init_filter(lib)
+        if success == 0:
+            break
+
+    if success == -1:
+        raise RuntimeError("Failed to load all HDF5 symbols using these libs: {}".format(libs))
+
+
+def register_h5_filter():
+    ret = bshuf_register_h5filter()
+    if ret < 0:
+        raise RuntimeError("Failed to register bitshuffle HDF5 filter.", ret)
+
+
+register_h5_filter()
+
+
+def create_dataset(parent, name, shape, dtype, chunks=None, maxshape=None,
+                   fillvalue=None, track_times=None,
+                   filter_pipeline=(), filter_flags=None, filter_opts=None):
+    """Create a dataset with an arbitrary filter pipeline.
+
+    Return a new low-level dataset identifier.
+
+    Much of this code is copied from h5py, but couldn't reuse much code due to
+    unstable API.
+
+    """
+
+    if hasattr(filter_pipeline, "__getitem__"):
+        filter_pipeline = list(filter_pipeline)
+    else:
+        filter_pipeline = [filter_pipeline]
+        filter_flags = [filter_flags]
+        filter_opts = [filter_opts]
+    nfilters = len(filter_pipeline)
+    if filter_flags is None:
+        filter_flags = [None] * nfilters
+    if filter_opts is None:
+        filter_opts = [None] * nfilters
+    if not len(filter_flags) == nfilters or not len(filter_opts) == nfilters:
+        msg = "Supplied incompatible number of filters, flags, and options."
+        raise ValueError(msg)
+
+    shape = tuple(shape)
+
+    tmp_shape = maxshape if maxshape is not None else shape
+    # Validate chunk shape
+    chunks_larger = (numpy.array([ not i>=j
+                     for i, j in zip(tmp_shape, chunks) if i is not None])).any()
+    if isinstance(chunks, tuple) and chunks_larger:
+        errmsg = ("Chunk shape must not be greater than data shape in any "
+                  "dimension. {} is not compatible with {}".format(chunks, shape))
+        raise ValueError(errmsg)
+
+    if isinstance(dtype, h5py.Datatype):
+        # Named types are used as-is
+        tid = dtype.id
+        dtype = tid.dtype  # Following code needs this
+    else:
+        # Validate dtype
+        dtype = numpy.dtype(dtype)
+        tid = h5t.py_create(dtype, logical=1)
+
+    if shape == ():
+        if any((chunks, filter_pipeline)):
+            raise TypeError("Scalar datasets don't support chunk/filter options")
+        if maxshape and maxshape != ():
+            raise TypeError("Scalar datasets cannot be extended")
+        return h5p.create(h5p.DATASET_CREATE)
+
+    def rq_tuple(tpl, name):
+        """Check if chunks/maxshape match dataset rank"""
+        if tpl in (None, True):
+            return
+        try:
+            tpl = tuple(tpl)
+        except TypeError:
+            raise TypeError('"%s" argument must be None or a sequence object' % name)
+        if len(tpl) != len(shape):
+            raise ValueError('"%s" must have same rank as dataset shape' % name)
+
+    rq_tuple(chunks, 'chunks')
+    rq_tuple(maxshape, 'maxshape')
+
+    if (chunks is True) or (chunks is None and filter_pipeline):
+        chunks = filters.guess_chunk(shape, maxshape, dtype.itemsize)
+
+    if maxshape is True:
+        maxshape = (None,)*len(shape)
+
+    dcpl = h5p.create(h5p.DATASET_CREATE)
+    if chunks is not None:
+        dcpl.set_chunk(chunks)
+        dcpl.set_fill_time(h5d.FILL_TIME_ALLOC)  # prevent resize glitch
+
+    if fillvalue is not None:
+        fillvalue = numpy.array(fillvalue)
+        dcpl.set_fill_value(fillvalue)
+
+    if track_times in (True, False):
+        dcpl.set_obj_track_times(track_times)
+    elif track_times is not None:
+        raise TypeError("track_times must be either True or False")
+
+    for ii in range(nfilters):
+        this_filter = filter_pipeline[ii]
+        this_flags = filter_flags[ii]
+        this_opts = filter_opts[ii]
+        if this_flags is None:
+            this_flags = 0
+        if this_opts is None:
+            this_opts = ()
+        dcpl.set_filter(this_filter, this_flags, this_opts)
+
+    if maxshape is not None:
+        maxshape = tuple(m if m is not None else h5s.UNLIMITED
+                         for m in maxshape)
+    sid = h5s.create_simple(shape, maxshape)
+
+    dset_id = h5d.create(parent.id, name, tid, sid, dcpl=dcpl)
+
+    return dset_id
+
+
+def create_bitshuffle_lzf_dataset(parent, name, shape, dtype, chunks=None,
+                                  maxshape=None, fillvalue=None,
+                                  track_times=None):
+    """Create dataset with a filter pipeline including bitshuffle and LZF"""
+
+    filter_pipeline = [H5FILTER, LZF_FILTER]
+    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
+                             filter_pipeline=filter_pipeline, maxshape=maxshape,
+                             fillvalue=fillvalue, track_times=track_times)
+    return dset_id
+
+
+def create_bitshuffle_compressed_dataset(parent, name, shape, dtype,
+                                         chunks=None, maxshape=None,
+                                         fillvalue=None, track_times=None):
+    """Create dataset with bitshuffle+internal LZ4 compression."""
+
+    filter_pipeline = [H5FILTER, ]
+    filter_opts = [(0, H5_COMPRESS_LZ4)]
+    dset_id = create_dataset(parent, name, shape, dtype, chunks=chunks,
+                             filter_pipeline=filter_pipeline,
+                             filter_opts=filter_opts, maxshape=maxshape,
+                             fillvalue=fillvalue, track_times=track_times)
+    return dset_id
+
+
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
new file mode 100644
index 00000000..ccbb10f9
--- /dev/null
+++ b/conda-recipe/bld.bat
@@ -0,0 +1,3 @@
+SET CONDA_HOME=%PREFIX%
+"%PYTHON%" setup.py install
+if errorlevel 1 exit 1
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
new file mode 100644
index 00000000..34c3a689
--- /dev/null
+++ b/conda-recipe/build.sh
@@ -0,0 +1,2 @@
+export CONDA_HOME=$PREFIX
+$PYTHON setup.py install     # Python command to install the script
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
new file mode 100644
index 00000000..ac227e2b
--- /dev/null
+++ b/conda-recipe/meta.yaml
@@ -0,0 +1,27 @@
+package:
+    name: bitshuffle
+    version: 0.2.1
+source:
+    # git_url: https://github.com/kiyo-masui/bitshuffle.git
+    # git_rev: 0.2.1
+    path: ..
+    patches:
+      - setup.py.patch
+
+requirements:
+    build:
+        - python
+        - setuptools
+        - cython
+        - numpy
+        - h5py
+        - hdf5
+    run:
+        - python
+        - numpy
+        - h5py
+        - cython
+
+about:
+    home: https://github.com/kiyo-masui/bitshuffle/blob/master/setup.py
+    summary: "bitshuffle library."
diff --git a/conda-recipe/setup.py.patch b/conda-recipe/setup.py.patch
new file mode 100644
index 00000000..437a5ffa
--- /dev/null
+++ b/conda-recipe/setup.py.patch
@@ -0,0 +1,13 @@
+--- setup.py	2016-01-19 16:56:12.954563000 +0100
++++ xxx.py	2016-01-19 16:56:00.817087000 +0100
+@@ -40,8 +40,8 @@
+ 
+ # Copied from h5py.
+ # TODO, figure out what the canonacal way to do this should be.
+-INCLUDE_DIRS = []
+-LIBRARY_DIRS = []
++INCLUDE_DIRS = [os.environ['CONDA_HOME'] + '/include']
++LIBRARY_DIRS = [os.environ['CONDA_HOME'] + '/lib']
+ if sys.platform == 'darwin':
+     # putting here both macports and homebrew paths will generate
+     # "ld: warning: dir not found" at the linking phase 
diff --git a/lz4/LICENSE b/lz4/LICENSE
new file mode 100644
index 00000000..74c2cdd7
--- /dev/null
+++ b/lz4/LICENSE
@@ -0,0 +1,24 @@
+LZ4 Library
+Copyright (c) 2011-2016, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lz4/README.md b/lz4/README.md
new file mode 100644
index 00000000..f6ebf5e1
--- /dev/null
+++ b/lz4/README.md
@@ -0,0 +1,21 @@
+LZ4 - Library Files
+================================
+
+The __lib__ directory contains several files, but you don't necessarily need them all.
+
+To integrate fast LZ4 compression/decompression into your program, you basically just need "**lz4.c**" and "**lz4.h**".
+
+For more compression at the cost of compression speed (while preserving decompression speed), use **lz4hc** on top of regular lz4. `lz4hc` only provides compression functions. It also needs `lz4` to compile properly.
+
+If you want to produce files or data streams compatible with `lz4` command line utility, use **lz4frame**. This library encapsulates lz4-compressed blocks into the [official interoperable frame format]. In order to work properly, lz4frame needs lz4 and lz4hc, and also **xxhash**, which provides error detection algorithm.
+(_Advanced stuff_ : It's possible to hide xxhash symbols into a local namespace. This is what `liblz4` does, to avoid symbol duplication in case a user program would link to several libraries containing xxhash symbols.)
+
+A more complex "lz4frame_static.h" is also provided, although its usage is not recommended. It contains definitions which are not guaranteed to remain stable within future versions. Use for static linking ***only***.
+
+The other files are not source code. There are :
+
+ - LICENSE : contains the BSD license text
+ - Makefile : script to compile or install lz4 library (static or dynamic)
+ - liblz4.pc.in : for pkg-config (make install)
+
+[official interoperable frame format]: ../lz4_Frame_format.md
diff --git a/lz4/lz4.c b/lz4/lz4.c
new file mode 100644
index 00000000..9f5e9bfa
--- /dev/null
+++ b/lz4/lz4.c
@@ -0,0 +1,2495 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+/*-************************************
+*  Tuning parameters
+**************************************/
+/*
+ * LZ4_HEAPMODE :
+ * Select how default compression functions will allocate memory for their hash table,
+ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
+ */
+#ifndef LZ4_HEAPMODE
+#  define LZ4_HEAPMODE 0
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+
+/*-************************************
+*  CPU Feature Detection
+**************************************/
+/* LZ4_FORCE_MEMORY_ACCESS
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method is portable but violate C standard.
+ *            It can generate buggy code on targets which assembly generation depends on alignment.
+ *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
+#  if defined(__GNUC__) && \
+  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
+  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+#    define LZ4_FORCE_MEMORY_ACCESS 2
+#  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__)
+#    define LZ4_FORCE_MEMORY_ACCESS 1
+#  endif
+#endif
+
+/*
+ * LZ4_FORCE_SW_BITCOUNT
+ * Define this parameter if your target system or compiler does not support hardware bit count
+ */
+#if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
+#  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+
+
+/*-************************************
+*  Dependency
+**************************************/
+/*
+ * LZ4_SRC_INCLUDED:
+ * Amalgamation flag, whether lz4.c is included
+ */
+#ifndef LZ4_SRC_INCLUDED
+#  define LZ4_SRC_INCLUDED 1
+#endif
+
+#ifndef LZ4_STATIC_LINKING_ONLY
+#define LZ4_STATIC_LINKING_ONLY
+#endif
+
+#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
+#endif
+
+#define LZ4_STATIC_LINKING_ONLY  /* LZ4_DISTANCE_MAX */
+#include "lz4.h"
+/* see also "memory routines" below */
+
+
+/*-************************************
+*  Compiler Options
+**************************************/
+#if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
+#  include <intrin.h>               /* only present in VS2005+ */
+#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
+#endif  /* _MSC_VER */
+
+#ifndef LZ4_FORCE_INLINE
+#  ifdef _MSC_VER    /* Visual Studio */
+#    define LZ4_FORCE_INLINE static __forceinline
+#  else
+#    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#      ifdef __GNUC__
+#        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+#      else
+#        define LZ4_FORCE_INLINE static inline
+#      endif
+#    else
+#      define LZ4_FORCE_INLINE static
+#    endif /* __STDC_VERSION__ */
+#  endif  /* _MSC_VER */
+#endif /* LZ4_FORCE_INLINE */
+
+/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
+ * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
+ * together with a simple 8-byte copy loop as a fall-back path.
+ * However, this optimization hurts the decompression speed by >30%,
+ * because the execution does not go to the optimized loop
+ * for typical compressible data, and all of the preamble checks
+ * before going to the fall-back path become useless overhead.
+ * This optimization happens only with the -O3 flag, and -O2 generates
+ * a simple 8-byte copy loop.
+ * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
+ * functions are annotated with __attribute__((optimize("O2"))),
+ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
+ * of LZ4_wildCopy8 does not affect the compression speed.
+ */
+#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
+#  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
+#  undef LZ4_FORCE_INLINE
+#  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
+#else
+#  define LZ4_FORCE_O2
+#endif
+
+#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#ifndef likely
+#define likely(expr)     expect((expr) != 0, 1)
+#endif
+#ifndef unlikely
+#define unlikely(expr)   expect((expr) != 0, 0)
+#endif
+
+/* Should the alignment test prove unreliable, for some reason,
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
+#ifndef LZ4_ALIGN_TEST  /* can be externally provided */
+# define LZ4_ALIGN_TEST 1
+#endif
+
+
+/*-************************************
+*  Memory routines
+**************************************/
+#ifdef LZ4_USER_MEMORY_FUNCTIONS
+/* memory management functions can be customized by user project.
+ * Below functions must exist somewhere in the Project
+ * and be available at link time */
+void* LZ4_malloc(size_t s);
+void* LZ4_calloc(size_t n, size_t s);
+void  LZ4_free(void* p);
+# define ALLOC(s)          LZ4_malloc(s)
+# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
+# define FREEMEM(p)        LZ4_free(p)
+#else
+# include <stdlib.h>   /* malloc, calloc, free */
+# define ALLOC(s)          malloc(s)
+# define ALLOC_AND_ZERO(s) calloc(1,s)
+# define FREEMEM(p)        free(p)
+#endif
+
+#include <string.h>   /* memset, memcpy */
+#define MEM_INIT(p,v,s)   memset((p),(v),(s))
+
+
+/*-************************************
+*  Common Constants
+**************************************/
+#define MINMATCH 4
+
+#define WILDCOPYLENGTH 8
+#define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
+#define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
+#define FASTLOOP_SAFE_DISTANCE 64
+static const int LZ4_minLength = (MFLIMIT+1);
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
+#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
+#  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
+#endif
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+
+/*-************************************
+*  Error detection
+**************************************/
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
+#  include <assert.h>
+#else
+#  ifndef assert
+#    define assert(condition) ((void)0)
+#  endif
+#endif
+
+#define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
+
+#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
+#  include <stdio.h>
+   static int g_debuglog_enable = 1;
+#  define DEBUGLOG(l, ...) {                          \
+        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
+            fprintf(stderr, __FILE__ ": ");           \
+            fprintf(stderr, __VA_ARGS__);             \
+            fprintf(stderr, " \n");                   \
+    }   }
+#else
+#  define DEBUGLOG(l, ...) {}    /* disabled */
+#endif
+
+static int LZ4_isAligned(const void* ptr, size_t alignment)
+{
+    return ((size_t)ptr & (alignment -1)) == 0;
+}
+
+
+/*-************************************
+*  Types
+**************************************/
+#include <limits.h>
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+  typedef uintptr_t uptrval;
+#else
+# if UINT_MAX != 4294967295UL
+#   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
+# endif
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
+#endif
+
+#if defined(__x86_64__)
+  typedef U64    reg_t;   /* 64-bits in x32 mode */
+#else
+  typedef size_t reg_t;   /* 32-bits in x32 mode */
+#endif
+
+typedef enum {
+    notLimited = 0,
+    limitedOutput = 1,
+    fillOutput = 2
+} limitedOutput_directive;
+
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#else
+#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
+#endif
+
+static unsigned LZ4_isLittleEndian(void)
+{
+    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
+    return one.c[0];
+}
+
+
+#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
+/* lie to the compiler about data alignment; use with caution */
+
+static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
+static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
+static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
+
+static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
+static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
+
+#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) unalign;
+
+static U16 LZ4_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
+static U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; }
+
+static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
+static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
+
+#else  /* safe and portable access using memcpy() */
+
+static U16 LZ4_read16(const void* memPtr)
+{
+    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static U32 LZ4_read32(const void* memPtr)
+{
+    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static reg_t LZ4_read_ARCH(const void* memPtr)
+{
+    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
+}
+
+static void LZ4_write16(void* memPtr, U16 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+static void LZ4_write32(void* memPtr, U32 value)
+{
+    LZ4_memcpy(memPtr, &value, sizeof(value));
+}
+
+#endif /* LZ4_FORCE_MEMORY_ACCESS */
+
+
+static U16 LZ4_readLE16(const void* memPtr)
+{
+    if (LZ4_isLittleEndian()) {
+        return LZ4_read16(memPtr);
+    } else {
+        const BYTE* p = (const BYTE*)memPtr;
+        return (U16)((U16)p[0] + (p[1]<<8));
+    }
+}
+
+static void LZ4_writeLE16(void* memPtr, U16 value)
+{
+    if (LZ4_isLittleEndian()) {
+        LZ4_write16(memPtr, value);
+    } else {
+        BYTE* p = (BYTE*)memPtr;
+        p[0] = (BYTE) value;
+        p[1] = (BYTE)(value>>8);
+    }
+}
+
+/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
+LZ4_FORCE_INLINE
+void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
+}
+
+static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
+static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
+
+
+#ifndef LZ4_FAST_DEC_LOOP
+#  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
+#    define LZ4_FAST_DEC_LOOP 1
+#  elif defined(__aarch64__) && !defined(__clang__)
+     /* On aarch64, we disable this optimization for clang because on certain
+      * mobile chipsets, performance is reduced with clang. For information
+      * refer to https://github.com/lz4/lz4/pull/707 */
+#    define LZ4_FAST_DEC_LOOP 1
+#  else
+#    define LZ4_FAST_DEC_LOOP 0
+#  endif
+#endif
+
+#if LZ4_FAST_DEC_LOOP
+
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    assert(srcPtr + offset == dstPtr);
+    if (offset < 8) {
+        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
+        dstPtr[0] = srcPtr[0];
+        dstPtr[1] = srcPtr[1];
+        dstPtr[2] = srcPtr[2];
+        dstPtr[3] = srcPtr[3];
+        srcPtr += inc32table[offset];
+        LZ4_memcpy(dstPtr+4, srcPtr, 4);
+        srcPtr -= dec64table[offset];
+        dstPtr += 8;
+    } else {
+        LZ4_memcpy(dstPtr, srcPtr, 8);
+        dstPtr += 8;
+        srcPtr += 8;
+    }
+
+    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
+}
+
+/* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
+ * this version copies two times 16 bytes (instead of one time 32 bytes)
+ * because it must be compatible with offsets >= 16. */
+LZ4_FORCE_INLINE void
+LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
+{
+    BYTE* d = (BYTE*)dstPtr;
+    const BYTE* s = (const BYTE*)srcPtr;
+    BYTE* const e = (BYTE*)dstEnd;
+
+    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
+}
+
+/* LZ4_memcpy_using_offset()  presumes :
+ * - dstEnd >= dstPtr + MINMATCH
+ * - there is at least 8 bytes available to write after dstEnd */
+LZ4_FORCE_INLINE void
+LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
+{
+    BYTE v[8];
+
+    assert(dstEnd >= dstPtr + MINMATCH);
+
+    switch(offset) {
+    case 1:
+        MEM_INIT(v, *srcPtr, 8);
+        break;
+    case 2:
+        LZ4_memcpy(v, srcPtr, 2);
+        LZ4_memcpy(&v[2], srcPtr, 2);
+        LZ4_memcpy(&v[4], v, 4);
+        break;
+    case 4:
+        LZ4_memcpy(v, srcPtr, 4);
+        LZ4_memcpy(&v[4], srcPtr, 4);
+        break;
+    default:
+        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
+        return;
+    }
+
+    LZ4_memcpy(dstPtr, v, 8);
+    dstPtr += 8;
+    while (dstPtr < dstEnd) {
+        LZ4_memcpy(dstPtr, v, 8);
+        dstPtr += 8;
+    }
+}
+#endif
+
+
+/*-************************************
+*  Common functions
+**************************************/
+static unsigned LZ4_NbCommonBytes (reg_t val)
+{
+    assert(val != 0);
+    if (LZ4_isLittleEndian()) {
+        if (sizeof(val) == 8) {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
+            return (unsigned)_tzcnt_u64(val) >> 3;
+#       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r = 0;
+            _BitScanForward64(&r, (U64)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctzll((U64)val) >> 3;
+#       else
+            const U64 m = 0x0101010101010101ULL;
+            val ^= val - 1;
+            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
+#       endif
+        } else /* 32 bits */ {
+#       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            unsigned long r;
+            _BitScanForward(&r, (U32)val);
+            return (unsigned)r >> 3;
+#       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_ctz((U32)val) >> 3;
+#       else
+            const U32 m = 0x01010101;
+            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
+#       endif
+        }
+    } else   /* Big Endian CPU */ {
+        if (sizeof(val)==8) {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clzll((U64)val) >> 3;
+#       else
+#if 1
+            /* this method is probably faster,
+             * but adds a 128 bytes lookup table */
+            static const unsigned char ctz7_tab[128] = {
+                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+            };
+            U64 const mask = 0x0101010101010101ULL;
+            U64 const t = (((val >> 8) - mask) | val) & mask;
+            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
+#else
+            /* this method doesn't consume memory space like the previous one,
+             * but it contains several branches,
+             * that may end up slowing execution */
+            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
+            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
+            Note that this code path is never triggered in 32-bits mode. */
+            unsigned r;
+            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#endif
+#       endif
+        } else /* 32 bits */ {
+#       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
+                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
+                                        !defined(LZ4_FORCE_SW_BITCOUNT)
+            return (unsigned)__builtin_clz((U32)val) >> 3;
+#       else
+            val >>= 8;
+            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
+              (val + 0x00FF0000)) >> 24;
+            return (unsigned)val ^ 3;
+#       endif
+        }
+    }
+}
+
+
+#define STEPSIZE sizeof(reg_t)
+LZ4_FORCE_INLINE
+unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
+{
+    const BYTE* const pStart = pIn;
+
+    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) {
+            pIn+=STEPSIZE; pMatch+=STEPSIZE;
+        } else {
+            return LZ4_NbCommonBytes(diff);
+    }   }
+
+    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
+        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
+        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
+        pIn += LZ4_NbCommonBytes(diff);
+        return (unsigned)(pIn - pStart);
+    }
+
+    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (unsigned)(pIn - pStart);
+}
+
+
+#ifndef LZ4_COMMONDEFS_ONLY
+/*-************************************
+*  Local Constants
+**************************************/
+static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
+static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
+
+
+/*-************************************
+*  Local Structures and types
+**************************************/
+typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
+
+/**
+ * This enum distinguishes several different modes of accessing previous
+ * content in the stream.
+ *
+ * - noDict        : There is no preceding content.
+ * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
+ *                   blob being compressed are valid and refer to the preceding
+ *                   content (of length ctx->dictSize), which is available
+ *                   contiguously preceding in memory the content currently
+ *                   being compressed.
+ * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
+ *                   else in memory, starting at ctx->dictionary with length
+ *                   ctx->dictSize.
+ * - usingDictCtx  : Like usingExtDict, but everything concerning the preceding
+ *                   content is in a separate context, pointed to by
+ *                   ctx->dictCtx. ctx->dictionary, ctx->dictSize, and table
+ *                   entries in the current context that refer to positions
+ *                   preceding the beginning of the current compression are
+ *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
+ *                   ->dictSize describe the location and size of the preceding
+ *                   content, and matches are found by looking in the ctx
+ *                   ->dictCtx->hashTable.
+ */
+typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
+typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
+
+
+/*-************************************
+*  Local Utils
+**************************************/
+int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
+const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
+int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
+
+
+/*-************************************
+*  Internal Definitions used in Tests
+**************************************/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
+
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize);
+
+#if defined (__cplusplus)
+}
+#endif
+
+/*-******************************
+*  Compression functions
+********************************/
+LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
+{
+    if (tableType == byU16)
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
+{
+    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
+    if (LZ4_isLittleEndian()) {
+        const U64 prime5bytes = 889523592379ULL;
+        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
+    } else {
+        const U64 prime8bytes = 11400714785074694791ULL;
+        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
+    }
+}
+
+LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
+{
+    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
+    return LZ4_hash4(LZ4_read32(p), tableType);
+}
+
+LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
+{
+    switch (tableType)
+    {
+    default: /* fallthrough */
+    case clearedTable: /* fallthrough */
+    case byPtr: { /* illegal! */ assert(0); return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
+                                  void* tableBase, tableType_t const tableType,
+                            const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case clearedTable: { /* illegal! */ assert(0); return; }
+    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
+    }
+}
+
+LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+/* LZ4_getIndexOnHash() :
+ * Index of match position registered in hash table.
+ * hash position must be calculated by using base+index, or dictBase+index.
+ * Assumption 1 : only valid if tableType == byU32 or byU16.
+ * Assumption 2 : h is presumed valid (within limits of hash table)
+ */
+LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
+{
+    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
+    if (tableType == byU32) {
+        const U32* const hashTable = (const U32*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
+        return hashTable[h];
+    }
+    if (tableType == byU16) {
+        const U16* const hashTable = (const U16*) tableBase;
+        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
+        return hashTable[h];
+    }
+    assert(0); return 0;  /* forbidden case */
+}
+
+static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; }
+    { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; }   /* default, to ensure a return */
+}
+
+LZ4_FORCE_INLINE const BYTE*
+LZ4_getPosition(const BYTE* p,
+                const void* tableBase, tableType_t tableType,
+                const BYTE* srcBase)
+{
+    U32 const h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+LZ4_FORCE_INLINE void
+LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
+           const int inputSize,
+           const tableType_t tableType) {
+    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
+     * therefore safe to use no matter what mode we're in. Otherwise, we figure
+     * out if it's safe to leave as is or whether it needs to be reset.
+     */
+    if ((tableType_t)cctx->tableType != clearedTable) {
+        assert(inputSize >= 0);
+        if ((tableType_t)cctx->tableType != tableType
+          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
+          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
+          || tableType == byPtr
+          || inputSize >= 4 KB)
+        {
+            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
+            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
+            cctx->currentOffset = 0;
+            cctx->tableType = (U32)clearedTable;
+        } else {
+            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
+        }
+    }
+
+    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, is faster
+     * than compressing without a gap. However, compressing with
+     * currentOffset == 0 is faster still, so we preserve that case.
+     */
+    if (cctx->currentOffset != 0 && tableType == byU32) {
+        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
+        cctx->currentOffset += 64 KB;
+    }
+
+    /* Finally, clear history */
+    cctx->dictCtx = NULL;
+    cctx->dictionary = NULL;
+    cctx->dictSize = 0;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time.
+ *  Presumed already validated at this stage:
+ *  - source != NULL
+ *  - inputSize > 0
+ */
+LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const source,
+                 char* const dest,
+                 const int inputSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int maxOutputSize,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    int result;
+    const BYTE* ip = (const BYTE*) source;
+
+    U32 const startIndex = cctx->currentOffset;
+    const BYTE* base = (const BYTE*) source - startIndex;
+    const BYTE* lowLimit;
+
+    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
+    const BYTE* const dictionary =
+        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
+    const U32 dictSize =
+        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
+    const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with index in current context */
+
+    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
+    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
+    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    /* the dictCtx currentOffset is indexed on the start of the dictionary,
+     * while a dictionary in the current context precedes the currentOffset */
+    const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
+                            dictionary + dictSize - dictCtx->currentOffset :
+                            dictionary + dictSize - startIndex;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const olimit = op + maxOutputSize;
+
+    U32 offset = 0;
+    U32 forwardH;
+
+    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
+    assert(ip != NULL);
+    /* If init conditions are not met, we don't have to mark stream
+     * as having dirty context, since no action was taken yet */
+    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
+    if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; }  /* Size too large (not within 64K limit) */
+    if (tableType==byPtr) assert(dictDirective==noDict);      /* only supported use case with byPtr */
+    assert(acceleration >= 1);
+
+    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
+
+    /* Update context state */
+    if (dictDirective == usingDictCtx) {
+        /* Subsequent linked blocks can't use the dictionary. */
+        /* Instead, they use the block we just compressed. */
+        cctx->dictCtx = NULL;
+        cctx->dictSize = (U32)inputSize;
+    } else {
+        cctx->dictSize += (U32)inputSize;
+    }
+    cctx->currentOffset += (U32)inputSize;
+    cctx->tableType = (U32)tableType;
+
+    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
+
+    /* First Byte */
+    LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    /* Main Loop */
+    for ( ; ; ) {
+        const BYTE* match;
+        BYTE* token;
+        const BYTE* filledIp;
+
+        /* Find a match */
+        if (tableType == byPtr) {
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base);
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base);
+
+            } while ( (match+LZ4_DISTANCE_MAX < ip)
+                   || (LZ4_read32(match) != LZ4_read32(ip)) );
+
+        } else {   /* byU32, byU16 */
+
+            const BYTE* forwardIp = ip;
+            int step = 1;
+            int searchMatchNb = acceleration << LZ4_skipTrigger;
+            do {
+                U32 const h = forwardH;
+                U32 const current = (U32)(forwardIp - base);
+                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+                assert(matchIndex <= current);
+                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
+                ip = forwardIp;
+                forwardIp += step;
+                step = (searchMatchNb++ >> LZ4_skipTrigger);
+
+                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
+                assert(ip < mflimitPlusOne);
+
+                if (dictDirective == usingDictCtx) {
+                    if (matchIndex < startIndex) {
+                        /* there was no match, try the dictionary */
+                        assert(tableType == byU32);
+                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                        match = dictBase + matchIndex;
+                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else if (dictDirective==usingExtDict) {
+                    if (matchIndex < startIndex) {
+                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
+                        assert(startIndex - matchIndex >= MINMATCH);
+                        match = dictBase + matchIndex;
+                        lowLimit = dictionary;
+                    } else {
+                        match = base + matchIndex;
+                        lowLimit = (const BYTE*)source;
+                    }
+                } else {   /* single continuous memory segment */
+                    match = base + matchIndex;
+                }
+                forwardH = LZ4_hashPosition(forwardIp, tableType);
+                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+
+                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
+                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
+                assert(matchIndex < current);
+                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
+                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
+                    continue;
+                } /* too far */
+                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
+
+                if (LZ4_read32(match) == LZ4_read32(ip)) {
+                    if (maybe_extMem) offset = current - matchIndex;
+                    break;   /* match found */
+                }
+
+            } while(1);
+        }
+
+        /* Catch up */
+        filledIp = ip;
+        while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
+
+        /* Encode Literals */
+        {   unsigned const litLength = (unsigned)(ip - anchor);
+            token = op++;
+            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
+                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+            if ((outputDirective == fillOutput) &&
+                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
+                op--;
+                goto _last_literals;
+            }
+            if (litLength >= RUN_MASK) {
+                int len = (int)(litLength - RUN_MASK);
+                *token = (RUN_MASK<<ML_BITS);
+                for(; len >= 255 ; len-=255) *op++ = 255;
+                *op++ = (BYTE)len;
+            }
+            else *token = (BYTE)(litLength<<ML_BITS);
+
+            /* Copy Literals */
+            LZ4_wildCopy8(op, anchor, op+litLength);
+            op+=litLength;
+            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
+        }
+
+_next_match:
+        /* at this stage, the following variables must be correctly set :
+         * - ip : at start of LZ operation
+         * - match : at start of previous pattern occurence; can be within current prefix, or within extDict
+         * - offset : if maybe_ext_memSegment==1 (constant)
+         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
+         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
+         */
+
+        if ((outputDirective == fillOutput) &&
+            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
+            /* the match was too close to the end, rewind and go to last literals */
+            op = token;
+            goto _last_literals;
+        }
+
+        /* Encode Offset */
+        if (maybe_extMem) {   /* static test */
+            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
+            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
+            LZ4_writeLE16(op, (U16)offset); op+=2;
+        } else  {
+            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
+            assert(ip-match <= LZ4_DISTANCE_MAX);
+            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
+        }
+
+        /* Encode MatchLength */
+        {   unsigned matchCode;
+
+            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
+              && (lowLimit==dictionary) /* match within extDict */ ) {
+                const BYTE* limit = ip + (dictEnd-match);
+                assert(dictEnd > match);
+                if (limit > matchlimit) limit = matchlimit;
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
+                ip += (size_t)matchCode + MINMATCH;
+                if (ip==limit) {
+                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
+                    matchCode += more;
+                    ip += more;
+                }
+                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
+            } else {
+                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
+                ip += (size_t)matchCode + MINMATCH;
+                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
+            }
+
+            if ((outputDirective) &&    /* Check output buffer overflow */
+                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
+                if (outputDirective == fillOutput) {
+                    /* Match description too long : reduce it */
+                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
+                    ip -= matchCode - newMatchCode;
+                    assert(newMatchCode < matchCode);
+                    matchCode = newMatchCode;
+                    if (unlikely(ip <= filledIp)) {
+                        /* We have already filled up to filledIp so if ip ends up less than filledIp
+                         * we have positions in the hash table beyond the current position. This is
+                         * a problem if we reuse the hash table. So we have to remove these positions
+                         * from the hash table.
+                         */
+                        const BYTE* ptr;
+                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
+                        for (ptr = ip; ptr <= filledIp; ++ptr) {
+                            U32 const h = LZ4_hashPosition(ptr, tableType);
+                            LZ4_clearHash(h, cctx->hashTable, tableType);
+                        }
+                    }
+                } else {
+                    assert(outputDirective == limitedOutput);
+                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+                }
+            }
+            if (matchCode >= ML_MASK) {
+                *token += ML_MASK;
+                matchCode -= ML_MASK;
+                LZ4_write32(op, 0xFFFFFFFF);
+                while (matchCode >= 4*255) {
+                    op+=4;
+                    LZ4_write32(op, 0xFFFFFFFF);
+                    matchCode -= 4*255;
+                }
+                op += matchCode / 255;
+                *op++ = (BYTE)(matchCode % 255);
+            } else
+                *token += (BYTE)(matchCode);
+        }
+        /* Ensure we have enough space for the last literals. */
+        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
+
+        anchor = ip;
+
+        /* Test end of chunk */
+        if (ip >= mflimitPlusOne) break;
+
+        /* Fill table */
+        LZ4_putPosition(ip-2, cctx->hashTable, tableType, base);
+
+        /* Test next position */
+        if (tableType == byPtr) {
+
+            match = LZ4_getPosition(ip, cctx->hashTable, tableType, base);
+            LZ4_putPosition(ip, cctx->hashTable, tableType, base);
+            if ( (match+LZ4_DISTANCE_MAX >= ip)
+              && (LZ4_read32(match) == LZ4_read32(ip)) )
+            { token=op++; *token=0; goto _next_match; }
+
+        } else {   /* byU32, byU16 */
+
+            U32 const h = LZ4_hashPosition(ip, tableType);
+            U32 const current = (U32)(ip-base);
+            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if (dictDirective == usingDictCtx) {
+                if (matchIndex < startIndex) {
+                    /* there was no match, try the dictionary */
+                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                    matchIndex += dictDelta;
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;  /* required for match length counter */
+                }
+            } else if (dictDirective==usingExtDict) {
+                if (matchIndex < startIndex) {
+                    match = dictBase + matchIndex;
+                    lowLimit = dictionary;   /* required for match length counter */
+                } else {
+                    match = base + matchIndex;
+                    lowLimit = (const BYTE*)source;   /* required for match length counter */
+                }
+            } else {   /* single memory segment */
+                match = base + matchIndex;
+            }
+            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
+            assert(matchIndex < current);
+            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
+              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
+              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
+                token=op++;
+                *token=0;
+                if (maybe_extMem) offset = current - matchIndex;
+                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
+                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
+                goto _next_match;
+            }
+        }
+
+        /* Prepare next loop */
+        forwardH = LZ4_hashPosition(++ip, tableType);
+
+    }
+
+_last_literals:
+    /* Encode Last Literals */
+    {   size_t lastRun = (size_t)(iend - anchor);
+        if ( (outputDirective) &&  /* Check output buffer overflow */
+            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
+            if (outputDirective == fillOutput) {
+                /* adapt lastRun to fill 'dst' */
+                assert(olimit >= op);
+                lastRun  = (size_t)(olimit-op) - 1/*token*/;
+                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
+            } else {
+                assert(outputDirective == limitedOutput);
+                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
+            }
+        }
+        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
+        if (lastRun >= RUN_MASK) {
+            size_t accumulator = lastRun - RUN_MASK;
+            *op++ = RUN_MASK << ML_BITS;
+            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRun<<ML_BITS);
+        }
+        LZ4_memcpy(op, anchor, lastRun);
+        ip = anchor + lastRun;
+        op += lastRun;
+    }
+
+    if (outputDirective == fillOutput) {
+        *inputConsumed = (int) (((const char*)ip)-source);
+    }
+    result = (int)(((char*)op) - dest);
+    assert(result > 0);
+    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
+    return result;
+}
+
+/** LZ4_compress_generic() :
+ *  inlined, to ensure branches are decided at compilation time;
+ *  takes care of src == (NULL, 0)
+ *  and forward the rest to LZ4_compress_generic_validated */
+LZ4_FORCE_INLINE int LZ4_compress_generic(
+                 LZ4_stream_t_internal* const cctx,
+                 const char* const src,
+                 char* const dst,
+                 const int srcSize,
+                 int *inputConsumed, /* only written when outputDirective == fillOutput */
+                 const int dstCapacity,
+                 const limitedOutput_directive outputDirective,
+                 const tableType_t tableType,
+                 const dict_directive dictDirective,
+                 const dictIssue_directive dictIssue,
+                 const int acceleration)
+{
+    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
+                srcSize, dstCapacity);
+
+    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
+    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
+        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
+        DEBUGLOG(5, "Generating an empty block");
+        assert(outputDirective == notLimited || dstCapacity >= 1);
+        assert(dst != NULL);
+        dst[0] = 0;
+        if (outputDirective == fillOutput) {
+            assert (inputConsumed != NULL);
+            *inputConsumed = 0;
+        }
+        return 1;
+    }
+    assert(src != NULL);
+
+    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
+                inputConsumed, /* only written into if outputDirective == fillOutput */
+                dstCapacity, outputDirective,
+                tableType, dictDirective, dictIssue, acceleration);
+}
+
+
+int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
+    assert(ctx != NULL);
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (inputSize < LZ4_64Klimit) {
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+/**
+ * LZ4_compress_fast_extState_fastReset() :
+ * A variant of LZ4_compress_fast_extState().
+ *
+ * Using this variant avoids an expensive initialization step. It is only safe
+ * to call if the state buffer is known to be correctly initialized already
+ * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
+ * "correctly initialized").
+ */
+int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
+{
+    LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    if (dstCapacity >= LZ4_compressBound(srcSize)) {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
+        }
+    } else {
+        if (srcSize < LZ4_64Klimit) {
+            const tableType_t tableType = byU16;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            if (ctx->currentOffset) {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
+            } else {
+                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+            }
+        } else {
+            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            LZ4_prepareTable(ctx, srcSize, tableType);
+            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
+        }
+    }
+}
+
+
+int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
+{
+    int result;
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctxPtr = ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctxPtr == NULL) return 0;
+#else
+    LZ4_stream_t ctx;
+    LZ4_stream_t* const ctxPtr = &ctx;
+#endif
+    result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctxPtr);
+#endif
+    return result;
+}
+
+
+int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize)
+{
+    return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1);
+}
+
+
+/* Note!: This function leaves the stream in an unclean/broken state!
+ * It is not safe to subsequently use the same state with a _fastReset() or
+ * _continue() call without resetting it. */
+static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+    void* const s = LZ4_initStream(state, sizeof (*state));
+    assert(s != NULL); (void)s;
+
+    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
+        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
+    } else {
+        if (*srcSizePtr < LZ4_64Klimit) {
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1);
+        } else {
+            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
+            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1);
+    }   }
+}
+
+
+int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
+{
+#if (LZ4_HEAPMODE)
+    LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
+    if (ctx == NULL) return 0;
+#else
+    LZ4_stream_t ctxBody;
+    LZ4_stream_t* ctx = &ctxBody;
+#endif
+
+    int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
+
+#if (LZ4_HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+
+
+/*-******************************
+*  Streaming functions
+********************************/
+
+LZ4_stream_t* LZ4_createStream(void)
+{
+    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal));    /* A compilation error here means LZ4_STREAMSIZE is not large enough */
+    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
+    if (lz4s == NULL) return NULL;
+    LZ4_initStream(lz4s, sizeof(*lz4s));
+    return lz4s;
+}
+
+static size_t LZ4_stream_t_alignment(void)
+{
+#if LZ4_ALIGN_TEST
+    typedef struct { char c; LZ4_stream_t t; } t_a;
+    return sizeof(t_a) - sizeof(LZ4_stream_t);
+#else
+    return 1;  /* effectively disabled */
+#endif
+}
+
+LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
+{
+    DEBUGLOG(5, "LZ4_initStream");
+    if (buffer == NULL) { return NULL; }
+    if (size < sizeof(LZ4_stream_t)) { return NULL; }
+    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
+    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
+    return (LZ4_stream_t*)buffer;
+}
+
+/* resetStream is now deprecated,
+ * prefer initStream() which is more general */
+void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
+{
+    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
+    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
+}
+
+void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
+    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
+}
+
+int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
+{
+    if (!LZ4_stream) return 0;   /* support free on NULL */
+    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
+    FREEMEM(LZ4_stream);
+    return (0);
+}
+
+
+#define HASH_UNIT sizeof(reg_t)
+int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
+{
+    LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse;
+    const tableType_t tableType = byU32;
+    const BYTE* p = (const BYTE*)dictionary;
+    const BYTE* const dictEnd = p + dictSize;
+    const BYTE* base;
+
+    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
+
+    /* It's necessary to reset the context,
+     * and not just continue it with prepareTable()
+     * to avoid any risk of generating overflowing matchIndex
+     * when compressing using this dictionary */
+    LZ4_resetStream(LZ4_dict);
+
+    /* We always increment the offset by 64 KB, since, if the dict is longer,
+     * we truncate it to the last 64k, and if it's shorter, we still want to
+     * advance by a whole window length so we can provide the guarantee that
+     * there are only valid offsets in the window, which allows an optimization
+     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
+     * dictionary isn't a full 64k. */
+    dict->currentOffset += 64 KB;
+
+    if (dictSize < (int)HASH_UNIT) {
+        return 0;
+    }
+
+    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
+    base = dictEnd - dict->currentOffset;
+    dict->dictionary = p;
+    dict->dictSize = (U32)(dictEnd - p);
+    dict->tableType = (U32)tableType;
+
+    while (p <= dictEnd-HASH_UNIT) {
+        LZ4_putPosition(p, dict->hashTable, tableType, base);
+        p+=3;
+    }
+
+    return (int)dict->dictSize;
+}
+
+void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
+    const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
+        &(dictionaryStream->internal_donotuse);
+
+    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
+             workingStream, dictionaryStream,
+             dictCtx != NULL ? dictCtx->dictSize : 0);
+
+    if (dictCtx != NULL) {
+        /* If the current offset is zero, we will never look in the
+         * external dictionary context, since there is no value a table
+         * entry can take that indicate a miss. In that case, we need
+         * to bump the offset to something non-zero.
+         */
+        if (workingStream->internal_donotuse.currentOffset == 0) {
+            workingStream->internal_donotuse.currentOffset = 64 KB;
+        }
+
+        /* Don't actually attach an empty dictionary.
+         */
+        if (dictCtx->dictSize == 0) {
+            dictCtx = NULL;
+        }
+    }
+    workingStream->internal_donotuse.dictCtx = dictCtx;
+}
+
+
+static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
+{
+    assert(nextSize >= 0);
+    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
+        /* rescale hash table */
+        U32 const delta = LZ4_dict->currentOffset - 64 KB;
+        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
+        int i;
+        DEBUGLOG(4, "LZ4_renormDictT");
+        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
+            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
+            else LZ4_dict->hashTable[i] -= delta;
+        }
+        LZ4_dict->currentOffset = 64 KB;
+        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
+        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
+    }
+}
+
+
+int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
+                                const char* source, char* dest,
+                                int inputSize, int maxOutputSize,
+                                int acceleration)
+{
+    const tableType_t tableType = byU32;
+    LZ4_stream_t_internal* streamPtr = &LZ4_stream->internal_donotuse;
+    const BYTE* dictEnd = streamPtr->dictionary + streamPtr->dictSize;
+
+    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
+
+    LZ4_renormDictT(streamPtr, inputSize);   /* avoid index overflow */
+    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
+    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
+
+    /* invalidate tiny dictionaries */
+    if ( (streamPtr->dictSize-1 < 4-1)   /* intentional underflow */
+      && (dictEnd != (const BYTE*)source) ) {
+        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
+        streamPtr->dictSize = 0;
+        streamPtr->dictionary = (const BYTE*)source;
+        dictEnd = (const BYTE*)source;
+    }
+
+    /* Check overlapping input/dictionary space */
+    {   const BYTE* sourceEnd = (const BYTE*) source + inputSize;
+        if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) {
+            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
+            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
+            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
+            streamPtr->dictionary = dictEnd - streamPtr->dictSize;
+        }
+    }
+
+    /* prefix mode : source data follows dictionary */
+    if (dictEnd == (const BYTE*)source) {
+        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
+        else
+            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
+    }
+
+    /* external dictionary mode */
+    {   int result;
+        if (streamPtr->dictCtx) {
+            /* We depend here on the fact that dictCtx'es (produced by
+             * LZ4_loadDict) guarantee that their tables contain no references
+             * to offsets between dictCtx->currentOffset - 64 KB and
+             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
+             * to use noDictIssue even when the dict isn't a full 64 KB.
+             */
+            if (inputSize > 4 KB) {
+                /* For compressing large blobs, it is faster to pay the setup
+                 * cost to copy the dictionary's tables into the active context,
+                 * so that the compression loop is only looking into one table.
+                 */
+                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
+            }
+        } else {
+            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
+            } else {
+                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
+            }
+        }
+        streamPtr->dictionary = (const BYTE*)source;
+        streamPtr->dictSize = (U32)inputSize;
+        return result;
+    }
+}
+
+
+/* Hidden debug function, to force-test external dictionary mode */
+int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
+{
+    LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse;
+    int result;
+
+    LZ4_renormDictT(streamPtr, srcSize);
+
+    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
+    } else {
+        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
+    }
+
+    streamPtr->dictionary = (const BYTE*)source;
+    streamPtr->dictSize = (U32)srcSize;
+
+    return result;
+}
+
+
+/*! LZ4_saveDict() :
+ *  If previously compressed data block is not guaranteed to remain available at its memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  Note : you don't need to call LZ4_loadDict() afterwards,
+ *         dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue().
+ *  Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
+ */
+int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
+{
+    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
+    const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
+
+    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
+    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
+
+    if (safeBuffer == NULL) assert(dictSize == 0);
+    if (dictSize > 0)
+        memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
+
+    dict->dictionary = (const BYTE*)safeBuffer;
+    dict->dictSize = (U32)dictSize;
+
+    return dictSize;
+}
+
+
+
+/*-*******************************
+ *  Decompression functions
+ ********************************/
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
+
+#undef MIN
+#define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
+
+/* Read the variable-length literal or match length.
+ *
+ * ip - pointer to use as input.
+ * lencheck - end ip.  Return an error if ip advances >= lencheck.
+ * loop_check - check ip >= lencheck in body of loop.  Returns loop_error if so.
+ * initial_check - check ip >= lencheck before start of loop.  Returns initial_error if so.
+ * error (output) - error code.  Should be set to 0 before call.
+ */
+typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
+LZ4_FORCE_INLINE unsigned
+read_variable_length(const BYTE**ip, const BYTE* lencheck,
+                     int loop_check, int initial_check,
+                     variable_length_error* error)
+{
+    U32 length = 0;
+    U32 s;
+    if (initial_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+        *error = initial_error;
+        return length;
+    }
+    do {
+        s = **ip;
+        (*ip)++;
+        length += s;
+        if (loop_check && unlikely((*ip) >= lencheck)) {    /* overflow detection */
+            *error = loop_error;
+            return length;
+        }
+    } while (s==255);
+
+    return length;
+}
+
+/*! LZ4_decompress_generic() :
+ *  This generic decompression function covers all use cases.
+ *  It shall be instantiated several times, using different sets of directives.
+ *  Note that it is important for performance that this function really get inlined,
+ *  in order to remove useless branches during compilation optimization.
+ */
+LZ4_FORCE_INLINE int
+LZ4_decompress_generic(
+                 const char* const src,
+                 char* const dst,
+                 int srcSize,
+                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
+
+                 endCondition_directive endOnInput,   /* endOnOutputSize, endOnInputSize */
+                 earlyEnd_directive partialDecoding,  /* full, partial */
+                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
+                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
+                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
+                 const size_t dictSize         /* note : = 0 if noDict */
+                 )
+{
+    if (src == NULL) { return -1; }
+
+    {   const BYTE* ip = (const BYTE*) src;
+        const BYTE* const iend = ip + srcSize;
+
+        BYTE* op = (BYTE*) dst;
+        BYTE* const oend = op + outputSize;
+        BYTE* cpy;
+
+        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
+
+        const int safeDecode = (endOnInput==endOnInputSize);
+        const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
+
+
+        /* Set up the "end" pointers for the shortcut. */
+        const BYTE* const shortiend = iend - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
+        const BYTE* const shortoend = oend - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
+
+        const BYTE* match;
+        size_t offset;
+        unsigned token;
+        size_t length;
+
+
+        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
+
+        /* Special cases */
+        assert(lowPrefix <= op);
+        if ((endOnInput) && (unlikely(outputSize==0))) {
+            /* Empty output buffer */
+            if (partialDecoding) return 0;
+            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
+        }
+        if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
+        if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
+
+	/* Currently the fast loop shows a regression on qualcomm arm chips. */
+#if LZ4_FAST_DEC_LOOP
+        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
+            DEBUGLOG(6, "skip fast decode loop");
+            goto safe_decode;
+        }
+
+        /* Fast loop : decode sequences as long as output < iend-FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
+            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
+            if (endOnInput) { assert(ip < iend); }
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+
+                /* copy literals */
+                cpy = op+length;
+                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
+                    LZ4_wildCopy32(op, ip, cpy);
+                } else {   /* LZ4_decompress_fast() */
+                    if (cpy>oend-8) { goto safe_literal_copy; }
+                    LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                                                 * it doesn't know input length, and only relies on end-of-block properties */
+                }
+                ip += length; op = cpy;
+            } else {
+                cpy = op+length;
+                if (endOnInput) {  /* LZ4_decompress_safe() */
+                    DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
+                    /* We don't need to check oend, since we check it once for each loop below */
+                    if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
+                    /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
+                    LZ4_memcpy(op, ip, 16);
+                } else {  /* LZ4_decompress_fast() */
+                    /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
+                     * it doesn't know input length, and relies on end-of-block properties */
+                    LZ4_memcpy(op, ip, 8);
+                    if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
+                }
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+            assert(match <= op);
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+            if (length == ML_MASK) {
+                variable_length_error error = ok;
+                if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+                length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+                if (error != ok) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+            } else {
+                length += MINMATCH;
+                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
+                    goto safe_match_copy;
+                }
+
+                /* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
+                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
+                    if (offset >= 8) {
+                        assert(match >= lowPrefix);
+                        assert(match <= op);
+                        assert(op + 18 <= oend);
+
+                        LZ4_memcpy(op, match, 8);
+                        LZ4_memcpy(op+8, match+8, 8);
+                        LZ4_memcpy(op+16, match+16, 2);
+                        op += length;
+                        continue;
+            }   }   }
+
+            if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) {
+                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
+                        length = MIN(length, (size_t)(oend-op));
+                    } else {
+                        goto _output_error;  /* end-of-block condition violated */
+                }   }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) { *op++ = *copyFrom++; }
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+
+            /* copy match within block */
+            cpy = op + length;
+
+            assert((op <= oend) && (oend-op >= 32));
+            if (unlikely(offset<16)) {
+                LZ4_memcpy_using_offset(op, match, cpy, offset);
+            } else {
+                LZ4_wildCopy32(op, match, cpy);
+            }
+
+            op = cpy;   /* wildcopy correction */
+        }
+    safe_decode:
+#endif
+
+        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
+        while (1) {
+            token = *ip++;
+            length = token >> ML_BITS;  /* literal length */
+
+            assert(!endOnInput || ip <= iend); /* ip < iend before the increment */
+
+            /* A two-stage shortcut for the most common case:
+             * 1) If the literal length is 0..14, and there is enough space,
+             * enter the shortcut and copy 16 bytes on behalf of the literals
+             * (in the fast mode, only 8 bytes can be safely copied this way).
+             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
+             * manner; but we ensure that there's enough space in the output for
+             * those 18 bytes earlier, upon entering the shortcut (in other words,
+             * there is a combined check for both stages).
+             */
+            if ( (endOnInput ? length != RUN_MASK : length <= 8)
+                /* strictly "less than" on input, to re-enter the loop with at least one byte */
+              && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
+                /* Copy the literals */
+                LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
+                op += length; ip += length;
+
+                /* The second stage: prepare for match copying, decode full info.
+                 * If it doesn't work out, the info won't be wasted. */
+                length = token & ML_MASK; /* match length */
+                offset = LZ4_readLE16(ip); ip += 2;
+                match = op - offset;
+                assert(match <= op); /* check overflow */
+
+                /* Do not deal with overlapping matches. */
+                if ( (length != ML_MASK)
+                  && (offset >= 8)
+                  && (dict==withPrefix64k || match >= lowPrefix) ) {
+                    /* Copy the match. */
+                    LZ4_memcpy(op + 0, match + 0, 8);
+                    LZ4_memcpy(op + 8, match + 8, 8);
+                    LZ4_memcpy(op +16, match +16, 2);
+                    op += length + MINMATCH;
+                    /* Both stages worked, load the next token. */
+                    continue;
+                }
+
+                /* The second stage didn't work out, but the info is ready.
+                 * Propel it right to the point of match copying. */
+                goto _copy_match;
+            }
+
+            /* decode literal length */
+            if (length == RUN_MASK) {
+                variable_length_error error = ok;
+                length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
+                if (error == initial_error) { goto _output_error; }
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
+                if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
+            }
+
+            /* copy literals */
+            cpy = op+length;
+#if LZ4_FAST_DEC_LOOP
+        safe_literal_copy:
+#endif
+            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
+            if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
+              || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
+            {
+                /* We've either hit the input parsing restriction or the output parsing restriction.
+                 * In the normal scenario, decoding a full block, it must be the last sequence,
+                 * otherwise it's an error (invalid input or dimensions).
+                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
+                 */
+                if (partialDecoding) {
+                    /* Since we are partial decoding we may be in this block because of the output parsing
+                     * restriction, which is not valid since the output buffer is allowed to be undersized.
+                     */
+                    assert(endOnInput);
+                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
+                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
+                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
+                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of input.
+                     */
+                    if (ip+length > iend) {
+                        length = (size_t)(iend-ip);
+                        cpy = op + length;
+                    }
+                    /* Finishing in the middle of a literals segment,
+                     * due to lack of output space.
+                     */
+                    if (cpy > oend) {
+                        cpy = oend;
+                        assert(op<=oend);
+                        length = (size_t)(oend-op);
+                    }
+                } else {
+                    /* We must be on the last sequence because of the parsing limitations so check
+                     * that we exactly regenerate the original size (must be exact when !endOnInput).
+                     */
+                    if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
+                     /* We must be on the last sequence (or invalid) because of the parsing limitations
+                      * so check that we exactly consume the input and don't overrun the output buffer.
+                      */
+                    if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
+                        DEBUGLOG(6, "should have been last run of literals")
+                        DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
+                        DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
+                        goto _output_error;
+                    }
+                }
+                memmove(op, ip, length);  /* supports overlapping memory regions; only matters for in-place decompression scenarios */
+                ip += length;
+                op += length;
+                /* Necessarily EOF when !partialDecoding.
+                 * When partialDecoding, it is EOF if we've either
+                 * filled the output buffer or
+                 * can't proceed with reading an offset for following match.
+                 */
+                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
+                    break;
+                }
+            } else {
+                LZ4_wildCopy8(op, ip, cpy);   /* may overwrite up to WILDCOPYLENGTH beyond cpy */
+                ip += length; op = cpy;
+            }
+
+            /* get offset */
+            offset = LZ4_readLE16(ip); ip+=2;
+            match = op - offset;
+
+            /* get matchlength */
+            length = token & ML_MASK;
+
+    _copy_match:
+            if (length == ML_MASK) {
+              variable_length_error error = ok;
+              length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
+              if (error != ok) goto _output_error;
+                if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
+            }
+            length += MINMATCH;
+
+#if LZ4_FAST_DEC_LOOP
+        safe_match_copy:
+#endif
+            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
+            /* match starting within external dictionary */
+            if ((dict==usingExtDict) && (match < lowPrefix)) {
+                if (unlikely(op+length > oend-LASTLITERALS)) {
+                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
+                    else goto _output_error;   /* doesn't respect parsing restriction */
+                }
+
+                if (length <= (size_t)(lowPrefix-match)) {
+                    /* match fits entirely within external dictionary : just copy */
+                    memmove(op, dictEnd - (lowPrefix-match), length);
+                    op += length;
+                } else {
+                    /* match stretches into both external dictionary and current block */
+                    size_t const copySize = (size_t)(lowPrefix - match);
+                    size_t const restSize = length - copySize;
+                    LZ4_memcpy(op, dictEnd - copySize, copySize);
+                    op += copySize;
+                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
+                        BYTE* const endOfMatch = op + restSize;
+                        const BYTE* copyFrom = lowPrefix;
+                        while (op < endOfMatch) *op++ = *copyFrom++;
+                    } else {
+                        LZ4_memcpy(op, lowPrefix, restSize);
+                        op += restSize;
+                }   }
+                continue;
+            }
+            assert(match >= lowPrefix);
+
+            /* copy match within block */
+            cpy = op + length;
+
+            /* partialDecoding : may end anywhere within the block */
+            assert(op<=oend);
+            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                size_t const mlen = MIN(length, (size_t)(oend-op));
+                const BYTE* const matchEnd = match + mlen;
+                BYTE* const copyEnd = op + mlen;
+                if (matchEnd > op) {   /* overlap copy */
+                    while (op < copyEnd) { *op++ = *match++; }
+                } else {
+                    LZ4_memcpy(op, match, mlen);
+                }
+                op = copyEnd;
+                if (op == oend) { break; }
+                continue;
+            }
+
+            if (unlikely(offset<8)) {
+                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
+                op[0] = match[0];
+                op[1] = match[1];
+                op[2] = match[2];
+                op[3] = match[3];
+                match += inc32table[offset];
+                LZ4_memcpy(op+4, match, 4);
+                match -= dec64table[offset];
+            } else {
+                LZ4_memcpy(op, match, 8);
+                match += 8;
+            }
+            op += 8;
+
+            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
+                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
+                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
+                if (op < oCopyLimit) {
+                    LZ4_wildCopy8(op, match, oCopyLimit);
+                    match += oCopyLimit - op;
+                    op = oCopyLimit;
+                }
+                while (op < cpy) { *op++ = *match++; }
+            } else {
+                LZ4_memcpy(op, match, 8);
+                if (length > 16)  { LZ4_wildCopy8(op+8, match+8, cpy); }
+            }
+            op = cpy;   /* wildcopy correction */
+        }
+
+        /* end of decoding */
+        if (endOnInput) {
+            DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
+           return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
+       } else {
+           return (int) (((const char*)ip)-src);   /* Nb of input bytes read */
+       }
+
+        /* Overflow error detected */
+    _output_error:
+        return (int) (-(((const char*)ip)-src))-1;
+    }
+}
+
+
+/*===== Instantiate the API decoding functions. =====*/
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
+{
+    dstCapacity = MIN(targetOutputSize, dstCapacity);
+    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
+                                  endOnInputSize, partial_decode,
+                                  noDict, (BYTE*)dst, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/*===== Instantiate a few more decoding cases, used more than once. =====*/
+
+LZ4_FORCE_O2 /* Exported, an obsolete API function. */
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, withPrefix64k,
+                                  (BYTE*)dest - 64 KB, NULL, 0);
+}
+
+/* Another obsolete API function, paired with the previous one. */
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
+{
+    /* LZ4_decompress_fast doesn't validate match offsets,
+     * and thus serves well with any prefixed dictionary. */
+    return LZ4_decompress_fast(source, dest, originalSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                               size_t prefixSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, noDict,
+                                  (BYTE*)dest-prefixSize, NULL, 0);
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
+                                     int compressedSize, int maxOutputSize,
+                                     const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_O2
+static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
+                                       const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
+}
+
+/* The "double dictionary" mode, for use with e.g. ring buffers: the first part
+ * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
+ * These routines are used only once, in LZ4_decompress_*_continue().
+ */
+LZ4_FORCE_INLINE
+int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
+                                  endOnInputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+LZ4_FORCE_INLINE
+int LZ4_decompress_fast_doubleDict(const char* source, char* dest, int originalSize,
+                                   size_t prefixSize, const void* dictStart, size_t dictSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, originalSize,
+                                  endOnOutputSize, decode_full_block, usingExtDict,
+                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
+}
+
+/*===== streaming decompression functions =====*/
+
+LZ4_streamDecode_t* LZ4_createStreamDecode(void)
+{
+    LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
+    LZ4_STATIC_ASSERT(LZ4_STREAMDECODESIZE >= sizeof(LZ4_streamDecode_t_internal));    /* A compilation error here means LZ4_STREAMDECODESIZE is not large enough */
+    return lz4s;
+}
+
+int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
+{
+    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
+    FREEMEM(LZ4_stream);
+    return 0;
+}
+
+/*! LZ4_setStreamDecode() :
+ *  Use this function to instruct where to find the dictionary.
+ *  This function is not necessary if previous data is still available where it was decoded.
+ *  Loading a size of 0 is allowed (same effect as no dictionary).
+ * @return : 1 if OK, 0 if error
+ */
+int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    lz4sd->prefixSize = (size_t) dictSize;
+    lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
+    lz4sd->externalDict = NULL;
+    lz4sd->extDictSize  = 0;
+    return 1;
+}
+
+/*! LZ4_decoderRingBufferSize() :
+ *  when setting a ring buffer for streaming decompression (optional scenario),
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ *  Note : in a ring buffer scenario,
+ *  blocks are presumed decompressed next to each other.
+ *  When not enough space remains for next block (remainingSize < maxBlockSize),
+ *  decoding resumes from beginning of ring buffer.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+int LZ4_decoderRingBufferSize(int maxBlockSize)
+{
+    if (maxBlockSize < 0) return 0;
+    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
+    if (maxBlockSize < 16) maxBlockSize = 16;
+    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
+}
+
+/*
+*_continue() :
+    These decoding functions allow decompression of multiple blocks in "streaming" mode.
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where it stands using LZ4_setStreamDecode()
+*/
+LZ4_FORCE_O2
+int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+
+    if (lz4sd->prefixSize == 0) {
+        /* The first call, no dictionary yet. */
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd = (BYTE*)dest + result;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        /* They're rolling the current segment. */
+        if (lz4sd->prefixSize >= 64 KB - 1)
+            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        else if (lz4sd->extDictSize == 0)
+            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
+                                                         lz4sd->prefixSize);
+        else
+            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)result;
+        lz4sd->prefixEnd  += result;
+    } else {
+        /* The buffer wraps around, or they're switching to another buffer. */
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
+                                                  lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)result;
+        lz4sd->prefixEnd  = (BYTE*)dest + result;
+    }
+
+    return result;
+}
+
+LZ4_FORCE_O2
+int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
+{
+    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
+    int result;
+    assert(originalSize >= 0);
+
+    if (lz4sd->prefixSize == 0) {
+        assert(lz4sd->extDictSize == 0);
+        result = LZ4_decompress_fast(source, dest, originalSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
+    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
+        if (lz4sd->prefixSize >= 64 KB - 1 || lz4sd->extDictSize == 0)
+            result = LZ4_decompress_fast(source, dest, originalSize);
+        else
+            result = LZ4_decompress_fast_doubleDict(source, dest, originalSize,
+                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize += (size_t)originalSize;
+        lz4sd->prefixEnd  += originalSize;
+    } else {
+        lz4sd->extDictSize = lz4sd->prefixSize;
+        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
+        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
+                                             lz4sd->externalDict, lz4sd->extDictSize);
+        if (result <= 0) return result;
+        lz4sd->prefixSize = (size_t)originalSize;
+        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
+    }
+
+    return result;
+}
+
+
+/*
+Advanced decoding functions :
+*_usingDict() :
+    These decoding functions work the same as "_continue" ones,
+    the dictionary must be explicitly provided within parameters
+*/
+
+int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0)
+        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
+    if (dictStart+dictSize == dest) {
+        if (dictSize >= 64 KB - 1) {
+            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
+        }
+        assert(dictSize >= 0);
+        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
+    }
+    assert(dictSize >= 0);
+    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
+}
+
+int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
+{
+    if (dictSize==0 || dictStart+dictSize == dest)
+        return LZ4_decompress_fast(source, dest, originalSize);
+    assert(dictSize >= 0);
+    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
+}
+
+
+/*=*************************************************
+*  Obsolete Functions
+***************************************************/
+/* obsolete compression functions */
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
+}
+int LZ4_compress(const char* src, char* dest, int srcSize)
+{
+    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
+}
+int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
+}
+int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
+{
+    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
+}
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
+}
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
+}
+
+/*
+These decompression functions are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is totally equivalent to LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
+*/
+int LZ4_uncompress (const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_fast(source, dest, outputSize);
+}
+int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
+{
+    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
+}
+
+/* Obsolete Streaming functions */
+
+int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
+
+int LZ4_resetStreamState(void* state, char* inputBuffer)
+{
+    (void)inputBuffer;
+    LZ4_resetStream((LZ4_stream_t*)state);
+    return 0;
+}
+
+void* LZ4_create (char* inputBuffer)
+{
+    (void)inputBuffer;
+    return LZ4_createStream();
+}
+
+char* LZ4_slideInputBuffer (void* state)
+{
+    /* avoid const char * -> char * conversion warning */
+    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
+}
+
+#endif   /* LZ4_COMMONDEFS_ONLY */
diff --git a/lz4/lz4.h b/lz4/lz4.h
new file mode 100644
index 00000000..7ab1e483
--- /dev/null
+++ b/lz4/lz4.h
@@ -0,0 +1,774 @@
+/*
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-present, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/* --- Dependency --- */
+#include <stddef.h>   /* size_t */
+
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
+*/
+
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+#  if defined(__GNUC__) && (__GNUC__ >= 4)
+#    define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default")))
+#  else
+#    define LZ4LIB_VISIBILITY
+#  endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1)
+#  define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1)
+#  define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    9    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE  3    /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION)
+
+LZ4LIB_API int LZ4_versionNumber (void);  /**< library version number; useful to check dll version */
+LZ4LIB_API const char* LZ4_versionString (void);   /**< library version string; useful to check dll version */
+
+
+/*-************************************
+*  Tuning parameter
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio.
+ * Reduced memory usage may improve speed, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+ */
+#ifndef LZ4_MEMORY_USAGE
+# define LZ4_MEMORY_USAGE 14
+#endif
+
+
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
+
+/*! LZ4_decompress_safe() :
+ *  compressedSize : is the exact complete size of the compressed block.
+ *  dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
+
+
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'targetDestSize'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize);
+
+
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
+
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t;  /* incomplete type (defined later) */
+
+LZ4LIB_API LZ4_stream_t* LZ4_createStream(void);
+LZ4LIB_API int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 accept any input as dictionary,
+ *  results are generally better when using Zstandard's Dictionary Builder.
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (necessarily <= 64 KB)
+ */
+LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize);
+
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t;   /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
+LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+LZ4LIB_API int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
+/*! LZ4_setStreamDecode() :
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize))  /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_*_continue() :
+ *  These decoding functions allow decompression of consecutive blocks in "streaming" mode.
+ *  A block is an unsplittable entity, it must be presented entirely to a decompression function.
+ *  Decompression functions only accepts one block at a time.
+ *  The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int srcSize, int dstCapacity);
+
+
+/*! LZ4_decompress_*_usingDict() :
+ *  These decoding functions work the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue()
+ *  They are stand-alone, and don't need an LZ4_streamDecode_t structure.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+
+/*^*************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration);
+
+/*! LZ4_attach_dictionary() :
+ *  This is an experimental API that allows
+ *  efficient use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDict() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the first compression call on the stream.
+ */
+LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
+
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly contrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)          (((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)   ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize))  /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX   /* history window size; can be user-defined at compile time */
+#  define LZ4_DISTANCE_MAX 65535   /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                           (LZ4_DISTANCE_MAX + 32)   /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)   ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN)  /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif   /* LZ4_STATIC_3504398509 */
+#endif   /* LZ4_STATIC_LINKING_ONLY */
+
+
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG   (LZ4_MEMORY_USAGE-2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)       /* required as macro for static allocation */
+
+#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# include <stdint.h>
+  typedef  int8_t  LZ4_i8;
+  typedef uint8_t  LZ4_byte;
+  typedef uint16_t LZ4_u16;
+  typedef uint32_t LZ4_u32;
+#else
+  typedef   signed char  LZ4_i8;
+  typedef unsigned char  LZ4_byte;
+  typedef unsigned short LZ4_u16;
+  typedef unsigned int   LZ4_u32;
+#endif
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+    LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+    LZ4_u32 currentOffset;
+    LZ4_u32 tableType;
+    const LZ4_byte* dictionary;
+    const LZ4_stream_t_internal* dictCtx;
+    LZ4_u32 dictSize;
+};
+
+typedef struct {
+    const LZ4_byte* externalDict;
+    size_t extDictSize;
+    const LZ4_byte* prefixEnd;
+    size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+
+/*! LZ4_stream_t :
+ *  Do not use below internal definitions directly !
+ *  Declare or allocate an LZ4_stream_t instead.
+ *  LZ4_stream_t can also be created using LZ4_createStream(), which is recommended.
+ *  The structure definition can be convenient for static allocation
+ *  (on stack, or as part of larger structure).
+ *  Init this structure with LZ4_initStream() before first use.
+ *  note : only use this definition in association with static linking !
+ *  this definition is not API/ABI safe, and may change in future versions.
+ */
+#define LZ4_STREAMSIZE       16416  /* static size, for inter-version compatibility */
+#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*))
+union LZ4_stream_u {
+    void* table[LZ4_STREAMSIZE_VOIDP];
+    LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+ */
+LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size);
+
+
+/*! LZ4_streamDecode_t :
+ *  information structure to track an LZ4 stream during decompression.
+ *  init this structure  using LZ4_setStreamDecode() before first use.
+ *  note : only use in association with static linking !
+ *         this definition is not API/ABI safe,
+ *         and may change in a future version !
+ */
+#define LZ4_STREAMDECODESIZE_U64 (4 + ((sizeof(void*)==16) ? 2 : 0) /*AS-400*/ )
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+union LZ4_streamDecode_u {
+    unsigned long long table[LZ4_STREAMDECODESIZE_U64];
+    LZ4_streamDecode_t_internal internal_donotuse;
+} ;   /* previously typedef'd to LZ4_streamDecode_t */
+
+
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#  define LZ4_DEPRECATED(message)   /* disable deprecation warnings */
+#else
+#  if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#    define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  else
+#    pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#    define LZ4_DEPRECATED(message)   /* disabled */
+#  endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress               (const char* src, char* dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")       LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")  LZ4LIB_API int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")     LZ4LIB_API char* LZ4_slideInputBuffer (void* state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead")
+LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize);
+LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
+
+
+#endif /* LZ4_H_98237428734687 */
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/lzf/LICENSE.txt b/lzf/LICENSE.txt
new file mode 100644
index 00000000..3787a007
--- /dev/null
+++ b/lzf/LICENSE.txt
@@ -0,0 +1,34 @@
+Copyright Notice and Statement for LZF filter
+
+Copyright (c) 2008-2009 Andrew Collette
+http://h5py.alfven.org
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+a. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+b. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the
+   distribution.
+
+c. Neither the name of the author nor the names of contributors may 
+   be used to endorse or promote products derived from this software 
+   without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/lzf/README.txt b/lzf/README.txt
new file mode 100644
index 00000000..c6ad62c3
--- /dev/null
+++ b/lzf/README.txt
@@ -0,0 +1,84 @@
+===============================
+LZF filter for HDF5, revision 3
+===============================
+
+The LZF filter provides high-speed compression with acceptable compression
+performance, resulting in much faster performance than DEFLATE, at the
+cost of a slightly lower compression ratio. It's appropriate for large
+datasets of low to moderate complexity, for which some compression is
+much better than none, but for which the speed of DEFLATE is unacceptable.
+
+This filter has been tested against HDF5 versions 1.6.5 through 1.8.3.  It
+is released under the BSD license (see LICENSE.txt for details).
+
+
+Using the filter from HDF5
+--------------------------
+
+There is exactly one new public function declared in lzf_filter.h, with
+the following signature:
+
+    int register_lzf(void)
+
+Calling this will register the filter with the HDF5 library.  A non-negative
+return value indicates success.  If the registration fails, an error is pushed
+onto the current error stack and a negative value is returned.
+
+It's strongly recommended to use the SHUFFLE filter with LZF, as it's
+cheap, supported by all current versions of HDF5, and can significantly
+improve the compression ratio.  An example C program ("example.c") is included
+which demonstrates the proper use of the filter.
+
+
+Compiling
+---------
+
+The filter consists of a single .c file and header, along with an embedded
+version of the LZF compression library.  Since the filter is stateless, it's
+recommended to statically link the entire thing into your program; for
+example:
+
+    $ gcc -O2 -lhdf5 lzf/*.c lzf_filter.c myprog.c -o myprog
+
+It can also be built as a shared library, although you will have to install
+the resulting library somewhere the runtime linker can find it:
+
+    $ gcc -O2 -lhdf5 -fPIC -shared lzf/*.c lzf_filter.c -o liblzf_filter.so
+
+A similar procedure should be used for building C++ code.  As in these
+examples, using option -O1 or higher is strongly recommended for increased
+performance.
+
+
+Contact
+-------
+
+This filter is maintained as part of the HDF5 for Python (h5py) project.  The
+goal of h5py is to provide access to the majority of the HDF5 C API and feature
+set from Python.  The most recent version of h5py (1.1) includes the LZF
+filter by default.
+
+* Downloads and bug tracker:        http://h5py.googlecode.com
+
+* Main web site and documentation:  http://h5py.alfven.org
+
+* Contact email:  h5py at alfven dot org
+
+
+History of changes
+------------------
+
+Revision 3 (6/25/09)
+    Fix issue with changed filter struct definition under HDF5 1.8.3.
+
+Revision 2
+    Minor speed enhancement.
+
+Revision 1
+    Initial release.
+
+
+
+
+
+
diff --git a/lzf/README_bitshuffle.txt b/lzf/README_bitshuffle.txt
new file mode 100644
index 00000000..d620a925
--- /dev/null
+++ b/lzf/README_bitshuffle.txt
@@ -0,0 +1,3 @@
+The LZF filter for HDF5 is part of the h5py project (http://h5py.alfven.org).
+The version included with bitshuffle is from version 2.3 of h5py with no
+modifications other than the addition of this README.
diff --git a/lzf/example.c b/lzf/example.c
new file mode 100644
index 00000000..23dd776c
--- /dev/null
+++ b/lzf/example.c
@@ -0,0 +1,106 @@
+/*
+    Copyright (C) 2009 Andrew Collette
+    http://h5py.alfven.org
+    License: BSD (see LICENSE.txt)
+
+    Example program demonstrating use of the LZF filter from C code.
+
+    To compile this program:
+
+    h5cc -DH5_USE_16_API lzf/*.c lzf_filter.c example.c -o example
+
+    To run:
+
+    $ ./example
+    Success!
+    $ h5ls -v test_lzf.hdf5 
+    Opened "test_lzf.hdf5" with sec2 driver.
+    dset                     Dataset {100/100, 100/100, 100/100}
+        Location:  0:1:0:976
+        Links:     1
+        Modified:  2009-02-15 16:35:11 PST
+        Chunks:    {1, 100, 100} 40000 bytes
+        Storage:   4000000 logical bytes, 174288 allocated bytes, 2295.05% utilization
+        Filter-0:  shuffle-2 OPT {4}
+        Filter-1:  lzf-32000 OPT {1, 261, 40000}
+        Type:      native float
+*/
+
+#include <stdio.h>
+#include "hdf5.h"
+#include "lzf_filter.h"
+
+#define SIZE 100*100*100
+#define SHAPE {100,100,100}
+#define CHUNKSHAPE {1,100,100}
+
+int main(){
+
+    static float data[SIZE];
+    static float data_out[SIZE];
+    const hsize_t shape[] = SHAPE;
+    const hsize_t chunkshape[] = CHUNKSHAPE;
+    int r, i;
+    int return_code = 1;
+
+    hid_t fid, sid, dset, plist = 0;
+
+    for(i=0; i<SIZE; i++){
+        data[i] = i;
+    }
+
+    /* Register the filter with the library */
+    r = register_lzf();
+    if(r<0) goto failed;
+
+    sid = H5Screate_simple(3, shape, NULL);
+    if(sid<0) goto failed;
+
+    fid = H5Fcreate("test_lzf.hdf5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+    if(fid<0) goto failed;
+
+    plist = H5Pcreate(H5P_DATASET_CREATE);
+    if(plist<0) goto failed;
+
+    /* Chunked layout required for filters */
+    r = H5Pset_chunk(plist, 3, chunkshape);
+    if(r<0) goto failed;
+
+    /* Use of the shuffle filter VASTLY improves performance of this
+       and other block-oriented compression filters.  Be sure to add
+       this before the compression filter!
+    */
+    r = H5Pset_shuffle(plist);
+    if(r<0) goto failed;
+
+    /* Note the "optional" flag is necessary, as with the DEFLATE filter */
+    r = H5Pset_filter(plist, H5PY_FILTER_LZF, H5Z_FLAG_OPTIONAL, 0, NULL);
+    if(r<0) goto failed;
+
+    dset = H5Dcreate(fid, "dset", H5T_NATIVE_FLOAT, sid, plist);
+    if(dset<0) goto failed;
+    
+    r = H5Dwrite(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data);
+    if(r<0) goto failed;
+
+    r = H5Dread(dset, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data_out);
+    if(r<0) goto failed;
+
+    for(i=0;i<SIZE;i++){
+        if(data[i] != data_out[i]) goto failed;
+    }
+
+    fprintf(stdout, "Success!\n");
+
+    return_code = 0;
+
+    failed:
+
+    if(dset>0)  H5Dclose(dset);
+    if(sid>0)   H5Sclose(sid);
+    if(plist>0) H5Pclose(plist);
+    if(fid>0)   H5Fclose(fid);
+
+    return return_code;
+}
+
diff --git a/lzf/lzf/lzf.h b/lzf/lzf/lzf.h
new file mode 100644
index 00000000..919b6e6b
--- /dev/null
+++ b/lzf/lzf/lzf.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZF_H
+#define LZF_H
+
+/***********************************************************************
+**
+**	lzf -- an extremely fast/free compression/decompression-method
+**	http://liblzf.plan9.de/
+**
+**	This algorithm is believed to be patent-free.
+**
+***********************************************************************/
+
+#define LZF_VERSION 0x0105 /* 1.5, API version */
+
+/*
+ * Compress in_len bytes stored at the memory block starting at
+ * in_data and write the result to out_data, up to a maximum length
+ * of out_len bytes.
+ *
+ * If the output buffer is not large enough or any error occurs return 0,
+ * otherwise return the number of bytes used, which might be considerably
+ * more than in_len (but less than 104% of the original size), so it
+ * makes sense to always use out_len == in_len - 1), to ensure _some_
+ * compression, and store the data uncompressed otherwise (with a flag, of
+ * course.
+ *
+ * lzf_compress might use different algorithms on different systems and
+ * even different runs, thus might result in different compressed strings
+ * depending on the phase of the moon or similar factors. However, all
+ * these strings are architecture-independent and will result in the
+ * original data when decompressed using lzf_decompress.
+ *
+ * The buffers must not be overlapping.
+ *
+ * If the option LZF_STATE_ARG is enabled, an extra argument must be
+ * supplied which is not reflected in this header file. Refer to lzfP.h
+ * and lzf_c.c.
+ *
+ */
+unsigned int 
+lzf_compress (const void *const in_data,  unsigned int in_len,
+              void             *out_data, unsigned int out_len);
+
+/*
+ * Decompress data compressed with some version of the lzf_compress
+ * function and stored at location in_data and length in_len. The result
+ * will be stored at out_data up to a maximum of out_len characters.
+ *
+ * If the output buffer is not large enough to hold the decompressed
+ * data, a 0 is returned and errno is set to E2BIG. Otherwise the number
+ * of decompressed bytes (i.e. the original length of the data) is
+ * returned.
+ *
+ * If an error in the compressed data is detected, a zero is returned and
+ * errno is set to EINVAL.
+ *
+ * This function is very fast, about as fast as a copying loop.
+ */
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len);
+
+#endif
+
diff --git a/lzf/lzf/lzfP.h b/lzf/lzf/lzfP.h
new file mode 100644
index 00000000..8414da4d
--- /dev/null
+++ b/lzf/lzf/lzfP.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#ifndef LZFP_h
+#define LZFP_h
+
+#define STANDALONE 1 /* at the moment, this is ok. */
+
+#ifndef STANDALONE
+# include "lzf.h"
+#endif
+
+/*
+ * Size of hashtable is (1 << HLOG) * sizeof (char *)
+ * decompression is independent of the hash table size
+ * the difference between 15 and 14 is very small
+ * for small blocks (and 14 is usually a bit faster).
+ * For a low-memory/faster configuration, use HLOG == 13;
+ * For best compression, use 15 or 16 (or more, up to 23).
+ */
+#ifndef HLOG
+# define HLOG 17  /* Avoid pathological case at HLOG=16   A.C. 2/15/09 */
+#endif
+
+/*
+ * Sacrifice very little compression quality in favour of compression speed.
+ * This gives almost the same compression as the default code, and is
+ * (very roughly) 15% faster. This is the preferred mode of operation.
+ */
+#ifndef VERY_FAST
+# define VERY_FAST 1
+#endif
+
+/*
+ * Sacrifice some more compression quality in favour of compression speed.
+ * (roughly 1-2% worse compression for large blocks and
+ * 9-10% for small, redundant, blocks and >>20% better speed in both cases)
+ * In short: when in need for speed, enable this for binary data,
+ * possibly disable this for text data.
+ */
+#ifndef ULTRA_FAST
+# define ULTRA_FAST 1
+#endif
+
+/*
+ * Unconditionally aligning does not cost very much, so do it if unsure
+ */
+#ifndef STRICT_ALIGN
+# define STRICT_ALIGN !(defined(__i386) || defined (__amd64))
+#endif
+
+/*
+ * You may choose to pre-set the hash table (might be faster on some
+ * modern cpus and large (>>64k) blocks, and also makes compression
+ * deterministic/repeatable when the configuration otherwise is the same).
+ */
+#ifndef INIT_HTAB
+# define INIT_HTAB 0
+#endif
+
+/* =======================================================================
+    Changing things below this line may break the HDF5 LZF filter.
+    A.C. 2/15/09
+   =======================================================================
+*/
+
+/*
+ * Avoid assigning values to errno variable? for some embedding purposes
+ * (linux kernel for example), this is neccessary. NOTE: this breaks
+ * the documentation in lzf.h.
+ */
+#ifndef AVOID_ERRNO
+# define AVOID_ERRNO 0
+#endif
+
+/*
+ * Wether to pass the LZF_STATE variable as argument, or allocate it
+ * on the stack. For small-stack environments, define this to 1.
+ * NOTE: this breaks the prototype in lzf.h.
+ */
+#ifndef LZF_STATE_ARG
+# define LZF_STATE_ARG 0
+#endif
+
+/*
+ * Wether to add extra checks for input validity in lzf_decompress
+ * and return EINVAL if the input stream has been corrupted. This
+ * only shields against overflowing the input buffer and will not
+ * detect most corrupted streams.
+ * This check is not normally noticable on modern hardware
+ * (<1% slowdown), but might slow down older cpus considerably.
+ */
+
+#ifndef CHECK_INPUT
+# define CHECK_INPUT 1
+#endif
+
+/*****************************************************************************/
+/* nothing should be changed below */
+
+typedef unsigned char u8;
+
+typedef const u8 *LZF_STATE[1 << (HLOG)];
+
+#if !STRICT_ALIGN
+/* for unaligned accesses we need a 16 bit datatype. */
+# include <limits.h>
+# if USHRT_MAX == 65535
+    typedef unsigned short u16;
+# elif UINT_MAX == 65535
+    typedef unsigned int u16;
+# else
+#  undef STRICT_ALIGN
+#  define STRICT_ALIGN 1
+# endif
+#endif
+
+#if ULTRA_FAST
+# if defined(VERY_FAST)
+#  undef VERY_FAST
+# endif
+#endif
+
+#if INIT_HTAB
+# ifdef __cplusplus
+#  include <cstring>
+# else
+#  include <string.h>
+# endif
+#endif
+
+#endif
+
diff --git a/lzf/lzf/lzf_c.c b/lzf/lzf/lzf_c.c
new file mode 100644
index 00000000..fbfd4cce
--- /dev/null
+++ b/lzf/lzf/lzf_c.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#define HSIZE (1 << (HLOG))
+
+/*
+ * don't play with this unless you benchmark!
+ * decompression is not dependent on the hash function
+ * the hashing function might seem strange, just believe me
+ * it works ;)
+ */
+#ifndef FRST
+# define FRST(p) (((p[0]) << 8) | p[1])
+# define NEXT(v,p) (((v) << 8) | p[2])
+# if ULTRA_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
+# elif VERY_FAST
+#  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# else
+#  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
+# endif
+#endif
+/*
+ * IDX works because it is very similar to a multiplicative hash, e.g.
+ * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
+ * the latter is also quite fast on newer CPUs, and compresses similarly.
+ *
+ * the next one is also quite good, albeit slow ;)
+ * (int)(cos(h & 0xffffff) * 1e6)
+ */
+
+#if 0
+/* original lzv-like hash function, much worse and thus slower */
+# define FRST(p) (p[0] << 5) ^ p[1]
+# define NEXT(v,p) ((v) << 5) ^ p[2]
+# define IDX(h) ((h) & (HSIZE - 1))
+#endif
+
+#define        MAX_LIT        (1 <<  5)
+#define        MAX_OFF        (1 << 13)
+#define        MAX_REF        ((1 << 8) + (1 << 3))
+
+#if __GNUC__ >= 3
+# define expect(expr,value)         __builtin_expect ((expr),(value))
+# define inline                     inline
+#else
+# define expect(expr,value)         (expr)
+# define inline                     static
+#endif
+
+#define expect_false(expr) expect ((expr) != 0, 0)
+#define expect_true(expr)  expect ((expr) != 0, 1)
+
+/*
+ * compressed format
+ *
+ * 000LLLLL <L+1>    ; literal
+ * LLLooooo oooooooo ; backref L
+ * 111ooooo LLLLLLLL oooooooo ; backref L+7
+ *
+ */
+
+unsigned int
+lzf_compress (const void *const in_data, unsigned int in_len,
+	      void *out_data, unsigned int out_len
+#if LZF_STATE_ARG
+              , LZF_STATE htab
+#endif
+              )
+{
+#if !LZF_STATE_ARG
+  LZF_STATE htab;
+#endif
+  const u8 **hslot;
+  const u8 *ip = (const u8 *)in_data;
+        u8 *op = (u8 *)out_data;
+  const u8 *in_end  = ip + in_len;
+        u8 *out_end = op + out_len;
+  const u8 *ref;
+
+  /* off requires a type wide enough to hold a general pointer difference.
+   * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
+   * works for differences within a single object). We also assume that no
+   * no bit pattern traps. Since the only platform that is both non-POSIX
+   * and fails to support both assumptions is windows 64 bit, we make a
+   * special workaround for it.
+   */
+#if ( defined (WIN32) && defined (_M_X64) ) || defined (_WIN64)
+  unsigned _int64 off; /* workaround for missing POSIX compliance */
+#else
+  unsigned long off;
+#endif
+  unsigned int hval;
+  int lit;
+
+  if (!in_len || !out_len)
+    return 0;
+
+#if INIT_HTAB
+  memset (htab, 0, sizeof (htab));
+# if 0
+  for (hslot = htab; hslot < htab + HSIZE; hslot++)
+    *hslot++ = ip;
+# endif
+#endif
+
+  lit = 0; op++; /* start run */
+
+  hval = FRST (ip);
+  while (ip < in_end - 2)
+    {
+      hval = NEXT (hval, ip);
+      hslot = htab + IDX (hval);
+      ref = *hslot; *hslot = ip;
+
+      if (1
+#if INIT_HTAB
+          && ref < ip /* the next test will actually take care of this, but this is faster */
+#endif
+          && (off = ip - ref - 1) < MAX_OFF
+          && ip + 4 < in_end
+          && ref > (u8 *)in_data
+#if STRICT_ALIGN
+          && ref[0] == ip[0]
+          && ref[1] == ip[1]
+          && ref[2] == ip[2]
+#else
+          && *(u16 *)ref == *(u16 *)ip
+          && ref[2] == ip[2]
+#endif
+        )
+        {
+          /* match found at *ref++ */
+          unsigned int len = 2;
+          unsigned int maxlen = in_end - ip - len;
+          maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
+
+          if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
+            if (op - !lit + 3 + 1 >= out_end) /* second the exact but rare test */
+              return 0;
+
+          op [- lit - 1] = lit - 1; /* stop run */
+          op -= !lit; /* undo run if length is zero */
+
+          for (;;)
+            {
+              if (expect_true (maxlen > 16))
+                {
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                  len++; if (ref [len] != ip [len]) break;
+                }
+
+              do
+                len++;
+              while (len < maxlen && ref[len] == ip[len]);
+
+              break;
+            }
+
+          len -= 2; /* len is now #octets - 1 */
+          ip++;
+
+          if (len < 7)
+            {
+              *op++ = (off >> 8) + (len << 5);
+            }
+          else
+            {
+              *op++ = (off >> 8) + (  7 << 5);
+              *op++ = len - 7;
+            }
+
+          *op++ = off;
+          lit = 0; op++; /* start run */
+
+          ip += len + 1;
+
+          if (expect_false (ip >= in_end - 2))
+            break;
+
+#if ULTRA_FAST || VERY_FAST
+          --ip;
+# if VERY_FAST && !ULTRA_FAST
+          --ip;
+# endif
+          hval = FRST (ip);
+
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+
+# if VERY_FAST && !ULTRA_FAST
+          hval = NEXT (hval, ip);
+          htab[IDX (hval)] = ip;
+          ip++;
+# endif
+#else
+          ip -= len + 1;
+
+          do
+            {
+              hval = NEXT (hval, ip);
+              htab[IDX (hval)] = ip;
+              ip++;
+            }
+          while (len--);
+#endif
+        }
+      else
+        {
+          /* one more literal byte we must copy */
+          if (expect_false (op >= out_end))
+            return 0;
+
+          lit++; *op++ = *ip++;
+
+          if (expect_false (lit == MAX_LIT))
+            {
+              op [- lit - 1] = lit - 1; /* stop run */
+              lit = 0; op++; /* start run */
+            }
+        }
+    }
+
+  if (op + 3 > out_end) /* at most 3 bytes can be missing here */
+    return 0;
+
+  while (ip < in_end)
+    {
+      lit++; *op++ = *ip++;
+
+      if (expect_false (lit == MAX_LIT))
+        {
+          op [- lit - 1] = lit - 1; /* stop run */
+          lit = 0; op++; /* start run */
+        }
+    }
+
+  op [- lit - 1] = lit - 1; /* end run */
+  op -= !lit; /* undo run if length is zero */
+
+  return op - (u8 *)out_data;
+}
+
diff --git a/lzf/lzf/lzf_d.c b/lzf/lzf/lzf_d.c
new file mode 100644
index 00000000..2e2eedaa
--- /dev/null
+++ b/lzf/lzf/lzf_d.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
+ * 
+ * Redistribution and use in source and binary forms, with or without modifica-
+ * tion, are permitted provided that the following conditions are met:
+ * 
+ *   1.  Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimer.
+ * 
+ *   2.  Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * the GNU General Public License ("GPL") version 2 or any later version,
+ * in which case the provisions of the GPL are applicable instead of
+ * the above. If you wish to allow the use of your version of this file
+ * only under the terms of the GPL and not to allow others to use your
+ * version of this file under the BSD license, indicate your decision
+ * by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL. If you do not delete the
+ * provisions above, a recipient may use your version of this file under
+ * either the BSD or the GPL.
+ */
+
+#include "lzfP.h"
+
+#if AVOID_ERRNO
+# define SET_ERRNO(n)
+#else
+# include <errno.h>
+# define SET_ERRNO(n) errno = (n)
+#endif
+
+/* ASM is slower than C in HDF5 tests -- A.C. 2/5/09
+#ifndef __STRICT_ANSI__
+#ifndef H5PY_DISABLE_LZF_ASM
+#if (__i386 || __amd64) && __GNUC__ >= 3
+# define lzf_movsb(dst, src, len)                \
+   asm ("rep movsb"                              \
+        : "=D" (dst), "=S" (src), "=c" (len)     \
+        :  "0" (dst),  "1" (src),  "2" (len));
+#endif
+#endif
+#endif
+*/
+
+unsigned int 
+lzf_decompress (const void *const in_data,  unsigned int in_len,
+                void             *out_data, unsigned int out_len)
+{
+  u8 const *ip = (const u8 *)in_data;
+  u8       *op = (u8 *)out_data;
+  u8 const *const in_end  = ip + in_len;
+  u8       *const out_end = op + out_len;
+
+  do
+    {
+      unsigned int ctrl = *ip++;
+
+      if (ctrl < (1 << 5)) /* literal run */
+        {
+          ctrl++;
+
+          if (op + ctrl > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+#if CHECK_INPUT
+          if (ip + ctrl > in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+
+#ifdef lzf_movsb
+          lzf_movsb (op, ip, ctrl);
+#else
+          do
+            *op++ = *ip++;
+          while (--ctrl);
+#endif
+        }
+      else /* back reference */
+        {
+          unsigned int len = ctrl >> 5;
+
+          u8 *ref = op - ((ctrl & 0x1f) << 8) - 1;
+
+#if CHECK_INPUT
+          if (ip >= in_end)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+#endif
+          if (len == 7)
+            {
+              len += *ip++;
+#if CHECK_INPUT
+              if (ip >= in_end)
+                {
+                  SET_ERRNO (EINVAL);
+                  return 0;
+                }
+#endif
+            }
+
+          ref -= *ip++;
+
+          if (op + len + 2 > out_end)
+            {
+              SET_ERRNO (E2BIG);
+              return 0;
+            }
+
+          if (ref < (u8 *)out_data)
+            {
+              SET_ERRNO (EINVAL);
+              return 0;
+            }
+
+#ifdef lzf_movsb
+          len += 2;
+          lzf_movsb (op, ref, len);
+#else
+          *op++ = *ref++;
+          *op++ = *ref++;
+
+          do
+            *op++ = *ref++;
+          while (--len);
+#endif
+        }
+    }
+  while (ip < in_end);
+
+  return op - (u8 *)out_data;
+}
+
diff --git a/lzf/lzf_filter.c b/lzf/lzf_filter.c
new file mode 100644
index 00000000..c6dd4b0e
--- /dev/null
+++ b/lzf/lzf_filter.c
@@ -0,0 +1,261 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+/*
+    Implements an LZF filter module for HDF5, using the BSD-licensed library
+    by Marc Alexander Lehmann (http://www.goof.com/pcg/marc/liblzf.html).
+
+    No Python-specific code is used.  The filter behaves like the DEFLATE
+    filter, in that it is called for every type and space, and returns 0
+    if the data cannot be compressed.
+
+    The only public function is (int) register_lzf(void), which passes on
+    the result from H5Zregister.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include "hdf5.h"
+#include "lzf/lzf.h"
+#include "lzf_filter.h"
+
+/* Our own versions of H5Epush_sim, as it changed in 1.8 */
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR < 7
+
+#define PUSH_ERR(func, minor, str)  H5Epush(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+#define H5PY_GET_FILTER H5Pget_filter_by_id
+
+#else
+
+#define PUSH_ERR(func, minor, str)  H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+#define H5PY_GET_FILTER(a,b,c,d,e,f,g) H5Pget_filter_by_id2(a,b,c,d,e,f,g,NULL)
+
+#endif
+
+/*  Deal with the mutiple definitions for H5Z_class_t.
+    Note: Only HDF5 1.6 and 1.8 are supported.
+
+    (1) The old class should always be used for HDF5 1.6
+    (2) The new class should always be used for HDF5 1.8 < 1.8.3
+    (3) The old class should be used for HDF5 1.8 >= 1.8.3 only if the
+        macro H5_USE_16_API is set
+*/
+
+#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 8 && (H5_VERS_RELEASE < 3 || !H5_USE_16_API)
+#define H5PY_H5Z_NEWCLS 1
+#else
+#define H5PY_H5Z_NEWCLS 0   
+#endif
+
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf);
+
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
+
+
+/* Try to register the filter, passing on the HDF5 return value */
+int register_lzf(void){
+
+    int retval;
+
+#if H5PY_H5Z_NEWCLS
+    H5Z_class_t filter_class = {
+        H5Z_CLASS_T_VERS,
+        (H5Z_filter_t)(H5PY_FILTER_LZF),
+        1, 1,
+        "lzf",
+        NULL,
+        (H5Z_set_local_func_t)(lzf_set_local),
+        (H5Z_func_t)(lzf_filter)
+    };
+#else
+    H5Z_class_t filter_class = {
+        (H5Z_filter_t)(H5PY_FILTER_LZF),
+        "lzf",
+        NULL,
+        (H5Z_set_local_func_t)(lzf_set_local),
+        (H5Z_func_t)(lzf_filter)
+    };
+#endif
+
+    retval = H5Zregister(&filter_class);
+    if(retval<0){
+        PUSH_ERR("register_lzf", H5E_CANTREGISTER, "Can't register LZF filter");
+    }
+    return retval;
+}
+
+/*  Filter setup.  Records the following inside the DCPL:
+
+    1.  If version information is not present, set slots 0 and 1 to the filter
+        revision and LZF API version, respectively.
+
+    2. Compute the chunk size in bytes and store it in slot 2.
+*/
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space){
+
+    int ndims;
+    int i;
+    herr_t r;
+
+    unsigned int bufsize;
+    hsize_t chunkdims[32];
+
+    unsigned int flags;
+    size_t nelements = 8;
+    unsigned values[] = {0,0,0,0,0,0,0,0};
+
+    r = H5PY_GET_FILTER(dcpl, H5PY_FILTER_LZF, &flags, &nelements, values, 0, NULL);
+    if(r<0) return -1;
+
+    if(nelements < 3) nelements = 3;  /* First 3 slots reserved.  If any higher
+                                      slots are used, preserve the contents. */
+
+    /* It seems the H5Z_FLAG_REVERSE flag doesn't work here, so we have to be
+       careful not to clobber any existing version info */
+    if(values[0]==0) values[0] = H5PY_FILTER_LZF_VERSION;
+    if(values[1]==0) values[1] = LZF_VERSION;
+
+    ndims = H5Pget_chunk(dcpl, 32, chunkdims);
+    if(ndims<0) return -1;
+    if(ndims>32){
+        PUSH_ERR("lzf_set_local", H5E_CALLBACK, "Chunk rank exceeds limit");
+        return -1;
+    }
+
+    bufsize = H5Tget_size(type);
+    if(bufsize==0) return -1;
+
+    for(i=0;i<ndims;i++){
+        bufsize *= chunkdims[i];
+    }
+
+    values[2] = bufsize;
+
+#ifdef H5PY_LZF_DEBUG
+    fprintf(stderr, "LZF: Computed buffer size %d\n", bufsize);
+#endif
+
+    r = H5Pmodify_filter(dcpl, H5PY_FILTER_LZF, flags, nelements, values);
+    if(r<0) return -1;
+
+    return 1;
+}
+
+
+/* The filter function */
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+		    const unsigned cd_values[], size_t nbytes,
+		    size_t *buf_size, void **buf){
+
+    void* outbuf = NULL;
+    size_t outbuf_size = 0;
+
+    unsigned int status = 0;        /* Return code from lzf routines */
+
+    /* We're compressing */
+    if(!(flags & H5Z_FLAG_REVERSE)){
+
+        /* Allocate an output buffer exactly as long as the input data; if
+           the result is larger, we simply return 0.  The filter is flagged
+           as optional, so HDF5 marks the chunk as uncompressed and
+           proceeds.
+        */
+
+        outbuf_size = (*buf_size);
+        outbuf = malloc(outbuf_size);
+
+        if(outbuf == NULL){
+            PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate compression buffer");
+            goto failed;
+        }
+
+        status = lzf_compress(*buf, nbytes, outbuf, outbuf_size);
+
+    /* We're decompressing */
+    } else {
+
+        if((cd_nelmts>=3)&&(cd_values[2]!=0)){
+            outbuf_size = cd_values[2];   /* Precomputed buffer guess */
+        }else{
+            outbuf_size = (*buf_size);
+        }
+
+#ifdef H5PY_LZF_DEBUG
+        fprintf(stderr, "Decompress %d chunk w/buffer %d\n", nbytes, outbuf_size);
+#endif
+
+        while(!status){
+            
+            free(outbuf);
+            outbuf = malloc(outbuf_size);
+
+            if(outbuf == NULL){
+                PUSH_ERR("lzf_filter", H5E_CALLBACK, "Can't allocate decompression buffer");
+                goto failed;
+            }
+
+            status = lzf_decompress(*buf, nbytes, outbuf, outbuf_size);
+
+            if(!status){    /* compression failed */
+
+                if(errno == E2BIG){
+                    outbuf_size += (*buf_size);
+#ifdef H5PY_LZF_DEBUG
+                    fprintf(stderr, "    Too small: %d\n", outbuf_size);
+#endif
+                } else if(errno == EINVAL) {
+
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Invalid data for LZF decompression");
+                    goto failed;
+
+                } else {
+                    PUSH_ERR("lzf_filter", H5E_CALLBACK, "Unknown LZF decompression error");
+                    goto failed;
+                }
+
+            } /* if !status */
+
+        } /* while !status */
+
+    } /* compressing vs decompressing */
+
+    if(status != 0){
+
+        free(*buf);
+        *buf = outbuf;
+        *buf_size = outbuf_size;
+
+        return status;  /* Size of compressed/decompressed data */
+    } 
+
+    failed:
+
+    free(outbuf);
+    return 0;
+
+} /* End filter function */
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lzf/lzf_filter.h b/lzf/lzf_filter.h
new file mode 100644
index 00000000..27dff83a
--- /dev/null
+++ b/lzf/lzf_filter.h
@@ -0,0 +1,38 @@
+/***** Preamble block *********************************************************
+* 
+* This file is part of h5py, a low-level Python interface to the HDF5 library.
+* 
+* Copyright (C) 2008 Andrew Collette
+* http://h5py.alfven.org
+* License: BSD  (See LICENSE.txt for full license)
+* 
+* $Date$
+* 
+****** End preamble block ****************************************************/
+
+
+#ifndef H5PY_LZF_H
+#define H5PY_LZF_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Filter revision number, starting at 1 */
+#define H5PY_FILTER_LZF_VERSION 4
+
+/* Filter ID registered with the HDF Group as of 2/6/09.  For maintenance
+   requests, contact the filter author directly. */
+#define H5PY_FILTER_LZF 32000
+
+/* Register the filter with the library. Returns a negative value on failure, 
+   and a non-negative value on success.
+*/
+int register_lzf(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..8b2a6860
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+# Include dependencies when building wheels on cibuildwheel
+[build-system]
+requires = [
+    "setuptools>=0.7",
+    "Cython>=0.19",
+    "oldest-supported-numpy",
+    "h5py>=2.4.0",
+]
+
+build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..34c51ec6
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+# Order matters
+setuptools>=0.7
+Cython>=0.19
+numpy>=1.6.1
+h5py>=2.4.0
diff --git a/setup.cfg.example b/setup.cfg.example
new file mode 100644
index 00000000..6bd2ccfb
--- /dev/null
+++ b/setup.cfg.example
@@ -0,0 +1,10 @@
+[install]
+# These control the installation of the hdf5 dynamically loaded filter plugin.
+h5plugin = 0
+h5plugin-dir = /usr/local/hdf5/lib/plugin
+
+[build_ext]
+# Whether to compile with OpenMP multi-threading. Default is system dependant:
+# False on OSX (since the clang compiler does not yet support OpenMP) and True
+# otherwise.
+omp = 1
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..ff99b8ef
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,419 @@
+from __future__ import absolute_import, division, print_function
+
+# I didn't import unicode_literals. They break setuptools or Cython in python
+# 2.7, but python 3 seems to be happy with them.
+
+import glob
+import os
+from os import path
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext as build_ext_
+from setuptools.command.develop import develop as develop_
+from setuptools.command.install import install as install_
+from Cython.Compiler.Main import default_options
+import shutil
+import subprocess
+import sys
+import platform
+
+
+VERSION_MAJOR = 0
+VERSION_MINOR = 4
+VERSION_POINT = 2
+# Define ZSTD macro for cython compilation
+default_options["compile_time_env"] = {"ZSTD_SUPPORT": False}
+
+# Only unset in the 'release' branch and in tags.
+VERSION_DEV = None
+
+VERSION = "%d.%d.%d" % (VERSION_MAJOR, VERSION_MINOR, VERSION_POINT)
+if VERSION_DEV:
+    VERSION = VERSION + ".dev%d" % VERSION_DEV
+
+
+COMPILE_FLAGS = ["-O3", "-ffast-math", "-std=c99"]
+# Cython breaks strict aliasing rules.
+COMPILE_FLAGS += ["-fno-strict-aliasing"]
+COMPILE_FLAGS += ["-fPIC"]
+COMPILE_FLAGS_MSVC = ["/Ox", "/fp:fast"]
+
+MACROS = [
+    ("BSHUF_VERSION_MAJOR", VERSION_MAJOR),
+    ("BSHUF_VERSION_MINOR", VERSION_MINOR),
+    ("BSHUF_VERSION_POINT", VERSION_POINT),
+]
+
+
+H5PLUGINS_DEFAULT = "/usr/local/hdf5/lib/plugin"
+MARCH_DEFAULT = "native"
+
+# OSX's clang compliler does not support OpenMP.
+if sys.platform == "darwin":
+    OMP_DEFAULT = False
+else:
+    OMP_DEFAULT = True
+
+FALLBACK_CONFIG = {
+    "include_dirs": [],
+    "library_dirs": [],
+    "libraries": [],
+    "extra_compile_args": [],
+    "extra_link_args": [],
+}
+
+if "HDF5_DIR" in os.environ:
+    FALLBACK_CONFIG["include_dirs"] += [os.environ["HDF5_DIR"] + "/include"]  # macports
+    FALLBACK_CONFIG["library_dirs"] += [os.environ["HDF5_DIR"] + "/lib"]  # macports
+elif sys.platform == "darwin":
+    # putting here both macports and homebrew paths will generate
+    # "ld: warning: dir not found" at the linking phase
+    FALLBACK_CONFIG["include_dirs"] += ["/opt/local/include"]  # macports
+    FALLBACK_CONFIG["library_dirs"] += ["/opt/local/lib"]  # macports
+    FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"]  # homebrew
+    FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"]  # homebrew
+elif sys.platform.startswith("freebsd"):
+    FALLBACK_CONFIG["include_dirs"] += ["/usr/local/include"]  # homebrew
+    FALLBACK_CONFIG["library_dirs"] += ["/usr/local/lib"]  # homebrew
+
+FALLBACK_CONFIG["include_dirs"] = [
+    d for d in FALLBACK_CONFIG["include_dirs"] if path.isdir(d)
+]
+FALLBACK_CONFIG["library_dirs"] = [
+    d for d in FALLBACK_CONFIG["library_dirs"] if path.isdir(d)
+]
+
+FALLBACK_CONFIG["extra_compile_args"] = ["-DH5_BUILT_AS_DYNAMIC_LIB"]
+
+
+def pkgconfig(*packages, **kw):
+    config = kw.setdefault("config", {})
+    optional_args = kw.setdefault("optional", "")
+    flag_map = {
+        "include_dirs": ["--cflags-only-I", 2],
+        "library_dirs": ["--libs-only-L", 2],
+        "libraries": ["--libs-only-l", 2],
+        "extra_compile_args": ["--cflags-only-other", 0],
+        "extra_link_args": ["--libs-only-other", 0],
+    }
+    for package in packages:
+        try:
+            subprocess.check_output(["pkg-config", package])
+        except (subprocess.CalledProcessError, OSError):
+            print(
+                "Can't find %s with pkg-config fallback to " "static config" % package
+            )
+            for distutils_key in flag_map:
+                config.setdefault(distutils_key, []).extend(
+                    FALLBACK_CONFIG[distutils_key]
+                )
+            config["libraries"].append(package)
+        else:
+            for distutils_key, (pkg_option, n) in flag_map.items():
+                items = (
+                    subprocess.check_output(
+                        ["pkg-config", optional_args, pkg_option, package]
+                    )
+                    .decode("utf8")
+                    .split()
+                )
+                opt = config.setdefault(distutils_key, [])
+                opt.extend([i[n:] for i in items])
+    return config
+
+
+zstd_headers = ["zstd/lib/zstd.h"]
+zstd_lib = ["zstd/lib/"]
+zstd_sources = glob.glob("zstd/lib/common/*.c")
+zstd_sources += glob.glob("zstd/lib/compress/*.c")
+zstd_sources += glob.glob("zstd/lib/decompress/*.c")
+
+ext_bshuf = Extension(
+    "bitshuffle.ext",
+    sources=[
+        "bitshuffle/ext.pyx",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    include_dirs=["src/", "lz4/"],
+    depends=["src/bitshuffle.h", "src/bitshuffle_core.h", "src/iochain.h", "lz4/lz4.h"],
+    libraries=[],
+    define_macros=MACROS,
+)
+
+h5filter = Extension(
+    "bitshuffle.h5",
+    sources=[
+        "bitshuffle/h5.pyx",
+        "src/bshuf_h5filter.c",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    depends=[
+        "src/bitshuffle.h",
+        "src/bitshuffle_core.h",
+        "src/iochain.h",
+        "src/bshuf_h5filter.h",
+        "lz4/lz4.h",
+    ],
+    define_macros=MACROS + [("H5_USE_18_API", None)],
+    **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"]))
+)
+
+if not sys.platform.startswith("win"):
+    h5filter.sources.append("src/hdf5_dl.c")
+    h5filter.libraries.remove("hdf5")
+
+filter_plugin = Extension(
+    "bitshuffle.plugin.libh5bshuf",
+    sources=[
+        "src/bshuf_h5plugin.c",
+        "src/bshuf_h5filter.c",
+        "src/bitshuffle.c",
+        "src/bitshuffle_core.c",
+        "src/iochain.c",
+        "lz4/lz4.c",
+    ],
+    depends=[
+        "src/bitshuffle.h",
+        "src/bitshuffle_core.h",
+        "src/iochain.h",
+        "src/bshuf_h5filter.h",
+        "lz4/lz4.h",
+    ],
+    define_macros=MACROS,
+    **pkgconfig("hdf5", config=dict(include_dirs=["src/", "lz4/"]))
+)
+
+lzf_plugin = Extension(
+    "bitshuffle.plugin.libh5LZF",
+    sources=[
+        "src/lzf_h5plugin.c",
+        "lzf/lzf_filter.c",
+        "lzf/lzf/lzf_c.c",
+        "lzf/lzf/lzf_d.c",
+    ],
+    depends=["lzf/lzf_filter.h", "lzf/lzf/lzf.h", "lzf/lzf/lzfP.h"],
+    **pkgconfig("hdf5", config=dict(include_dirs=["lzf/", "lzf/lzf/"]))
+)
+
+
+EXTENSIONS = [ext_bshuf, h5filter]
+
+# For enabling ZSTD support when building wheels
+if "ENABLE_ZSTD" in os.environ:
+    default_options["compile_time_env"] = {"ZSTD_SUPPORT": True}
+    for ext in EXTENSIONS:
+        if ext.name in [
+            "bitshuffle.ext",
+            "bitshuffle.h5",
+            "bitshuffle.plugin.libh5bshuf",
+        ]:
+            ext.sources += zstd_sources
+            ext.include_dirs += zstd_lib
+            ext.depends += zstd_headers
+            ext.define_macros += [("ZSTD_SUPPORT", 1)]
+
+# Check for plugin hdf5 plugin support (hdf5 >= 1.8.11)
+HDF5_PLUGIN_SUPPORT = False
+CPATHS = os.environ["CPATH"].split(":") if "CPATH" in os.environ else []
+for p in ["/usr/include"] + pkgconfig("hdf5")["include_dirs"] + CPATHS:
+    if os.path.exists(os.path.join(p, "H5PLextern.h")):
+        HDF5_PLUGIN_SUPPORT = True
+
+if HDF5_PLUGIN_SUPPORT:
+    EXTENSIONS.extend([filter_plugin, lzf_plugin])
+
+
+class develop(develop_):
+    def run(self):
+        # Dummy directory for copying build plugins.
+        if not path.isdir("bitshuffle/plugin"):
+            os.mkdir("bitshuffle/plugin")
+        develop_.run(self)
+
+
+# Custom installation to include installing dynamic filters.
+class install(install_):
+    user_options = install_.user_options + [
+        ("h5plugin", None, "Install HDF5 filter plugins for use outside of python."),
+        (
+            "h5plugin-dir=",
+            None,
+            "Where to install filter plugins. Default %s." % H5PLUGINS_DEFAULT,
+        ),
+        ("zstd", None, "Install ZSTD support."),
+    ]
+
+    def initialize_options(self):
+        install_.initialize_options(self)
+        self.h5plugin = False
+        self.zstd = False
+        self.h5plugin_dir = H5PLUGINS_DEFAULT
+
+    def finalize_options(self):
+        install_.finalize_options(self)
+        if self.h5plugin not in ("0", "1", True, False):
+            raise ValueError("Invalid h5plugin argument. Must be '0' or '1'.")
+        self.h5plugin = int(self.h5plugin)
+        self.h5plugin_dir = path.abspath(self.h5plugin_dir)
+        self.zstd = self.zstd
+
+        # Add ZSTD files and macro to extensions if ZSTD enabled
+        if self.zstd:
+            default_options["compile_time_env"] = {"ZSTD_SUPPORT": True}
+            for ext in EXTENSIONS:
+                if ext.name in [
+                    "bitshuffle.ext",
+                    "bitshuffle.h5",
+                    "bitshuffle.plugin.libh5bshuf",
+                ]:
+                    ext.sources += zstd_sources
+                    ext.include_dirs += zstd_lib
+                    ext.depends += zstd_headers
+                    ext.define_macros += [("ZSTD_SUPPORT", 1)]
+
+    def run(self):
+        install_.run(self)
+        if self.h5plugin:
+            if not HDF5_PLUGIN_SUPPORT:
+                print("HDF5 < 1.8.11, not installing filter plugins.")
+                return
+            plugin_build = path.join(self.build_lib, "bitshuffle", "plugin")
+            try:
+                os.makedirs(self.h5plugin_dir)
+            except OSError as e:
+                if e.args[0] == 17:
+                    # Directory already exists, this is fine.
+                    pass
+                else:
+                    raise
+            plugin_libs = glob.glob(path.join(plugin_build, "*"))
+            for plugin_lib in plugin_libs:
+                plugin_name = path.split(plugin_lib)[1]
+                shutil.copy2(plugin_lib, path.join(self.h5plugin_dir, plugin_name))
+            print("Installed HDF5 filter plugins to %s" % self.h5plugin_dir)
+
+
+# Command line or site.cfg specification of OpenMP.
+class build_ext(build_ext_):
+    user_options = build_ext_.user_options + [
+        (
+            "omp=",
+            None,
+            "Whether to compile with OpenMP threading. Default"
+            " on current system is %s." % str(OMP_DEFAULT),
+        ),
+        (
+            "march=",
+            None,
+            "Generate instructions for a specific machine type. Default is %s."
+            % MARCH_DEFAULT,
+        ),
+    ]
+    boolean_options = build_ext_.boolean_options + ["omp"]
+
+    def initialize_options(self):
+        build_ext_.initialize_options(self)
+        self.omp = OMP_DEFAULT
+        self.march = MARCH_DEFAULT
+
+    def finalize_options(self):
+        # For some reason this gets run twice. Careful to print messages and
+        # add arguments only one time.
+        build_ext_.finalize_options(self)
+
+        if self.omp not in ("0", "1", True, False):
+            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
+        self.omp = int(self.omp)
+
+        import numpy as np
+
+        ext_bshuf.include_dirs.append(np.get_include())
+
+        # Required only by old version of setuptools < 18.0
+        from Cython.Build import cythonize
+
+        self.extensions = cythonize(self.extensions)
+        for ext in self.extensions:
+            ext._needs_stub = False
+
+    def build_extensions(self):
+        c = self.compiler.compiler_type
+
+        if self.omp not in ("0", "1", True, False):
+            raise ValueError("Invalid omp argument. Mut be '0' or '1'.")
+        self.omp = int(self.omp)
+
+        if self.omp:
+            if not hasattr(self, "_printed_omp_message"):
+                self._printed_omp_message = True
+                print("\n#################################")
+                print("# Compiling with OpenMP support #")
+                print("#################################\n")
+            # More portable to pass -fopenmp to linker.
+            # self.libraries += ['gomp']
+            if self.compiler.compiler_type == "msvc":
+                openmpflag = "/openmp"
+                compileflags = COMPILE_FLAGS_MSVC
+            else:
+                openmpflag = "-fopenmp"
+                archi = platform.machine()
+                if archi in ("i386", "x86_64"):
+                    compileflags = COMPILE_FLAGS + ["-march=%s" % self.march]
+                else:
+                    compileflags = COMPILE_FLAGS + ["-mcpu=%s" % self.march]
+                    if archi == "ppc64le":
+                        compileflags = COMPILE_FLAGS + ["-DNO_WARN_X86_INTRINSICS"]
+            for e in self.extensions:
+                e.extra_compile_args = list(
+                    set(e.extra_compile_args).union(compileflags)
+                )
+                if openmpflag not in e.extra_compile_args:
+                    e.extra_compile_args += [openmpflag]
+                if openmpflag not in e.extra_link_args:
+                    e.extra_link_args += [openmpflag]
+
+        build_ext_.build_extensions(self)
+
+
+# Don't install numpy/cython/hdf5 if not needed
+for cmd in ["sdist", "clean", "--help", "--help-commands", "--version"]:
+    if cmd in sys.argv:
+        setup_requires = []
+        break
+else:
+    setup_requires = ["Cython>=0.19", "numpy>=1.6.1"]
+
+with open("requirements.txt") as f:
+    requires = f.read().splitlines()
+    requires = [r.split()[0] for r in requires]
+
+with open("README.rst") as r:
+    long_description = r.read()
+
+# TODO hdf5 support should be an "extra". Figure out how to set this up.
+setup(
+    name="bitshuffle",
+    version=VERSION,
+    packages=["bitshuffle", "bitshuffle"],
+    scripts=[],
+    ext_modules=EXTENSIONS,
+    cmdclass={"build_ext": build_ext, "install": install, "develop": develop},
+    setup_requires=setup_requires,
+    install_requires=requires,
+    # extras_require={'H5':  ["h5py"]},
+    package_data={"": ["data/*"]},
+    # metadata for upload to PyPI
+    author="Kiyoshi Wesley Masui",
+    author_email="kiyo@physics.ubc.ca",
+    description="Bitshuffle filter for improving typed data compression.",
+    long_description=long_description,
+    license="MIT",
+    url="https://github.com/kiyo-masui/bitshuffle",
+    download_url=("https://github.com/kiyo-masui/bitshuffle/tarball/%s" % VERSION),
+    keywords=["compression", "hdf5", "numpy"],
+)
diff --git a/src/bitshuffle.c b/src/bitshuffle.c
new file mode 100644
index 00000000..a8ef0b5c
--- /dev/null
+++ b/src/bitshuffle.c
@@ -0,0 +1,279 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include "bitshuffle.h"
+#include "bitshuffle_core.h"
+#include "bitshuffle_internals.h"
+#include "lz4.h"
+
+#ifdef ZSTD_SUPPORT
+#include "zstd.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+
+// Macros.
+#define CHECK_ERR_FREE_LZ(count, buf) if (count < 0) {                      \
+    free(buf); return count - 1000; }
+
+
+/* Bitshuffle and compress a single block. */
+int64_t bshuf_compress_lz4_block(ioc_chain *C_ptr, \
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *tmp_buf_bshuf;
+    void *tmp_buf_lz4;
+    size_t this_iter;
+    const void *in;
+    void *out;
+
+    tmp_buf_bshuf = malloc(size * elem_size);
+    if (tmp_buf_bshuf == NULL) return -1;
+
+    int dst_capacity = LZ4_compressBound(size * elem_size);
+    tmp_buf_lz4 = malloc(dst_capacity);
+    if (tmp_buf_lz4 == NULL){
+        free(tmp_buf_bshuf);
+        return -1;
+    }
+
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
+
+    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
+    if (count < 0) {
+        free(tmp_buf_lz4);
+        free(tmp_buf_bshuf);
+        return count;
+    }
+    nbytes = LZ4_compress_default((const char*) tmp_buf_bshuf, (char*) tmp_buf_lz4, size * elem_size, dst_capacity);
+    free(tmp_buf_bshuf);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_lz4);
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
+
+    bshuf_write_uint32_BE(out, nbytes);
+    memcpy((char *) out + 4, tmp_buf_lz4, nbytes);
+
+    free(tmp_buf_lz4);
+
+    return nbytes + 4;
+}
+
+
+/* Decompress and bitunshuffle a single block. */
+int64_t bshuf_decompress_lz4_block(ioc_chain *C_ptr,
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *out, *tmp_buf;
+    const void *in;
+    size_t this_iter;
+    int32_t nbytes_from_header;
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    nbytes_from_header = bshuf_read_uint32_BE(in);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + nbytes_from_header + 4));
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    nbytes = LZ4_decompress_safe((const char*) in + 4, (char *) tmp_buf, nbytes_from_header,
+                                 size * elem_size);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
+    if (nbytes != size * elem_size) {
+        free(tmp_buf);
+        return -91;
+    }
+    nbytes = nbytes_from_header;
+
+    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    nbytes += 4;
+
+    free(tmp_buf);
+    return nbytes;
+}
+
+#ifdef ZSTD_SUPPORT
+/* Bitshuffle and compress a single block. */
+int64_t bshuf_compress_zstd_block(ioc_chain *C_ptr, \
+        const size_t size, const size_t elem_size, const int comp_lvl) {
+
+    int64_t nbytes, count;
+    void *tmp_buf_bshuf;
+    void *tmp_buf_zstd;
+    size_t this_iter;
+    const void *in;
+    void *out;
+
+    tmp_buf_bshuf = malloc(size * elem_size);
+    if (tmp_buf_bshuf == NULL) return -1;
+
+    size_t tmp_buf_zstd_size = ZSTD_compressBound(size * elem_size);
+    tmp_buf_zstd = malloc(tmp_buf_zstd_size);
+    if (tmp_buf_zstd == NULL){
+        free(tmp_buf_bshuf);
+        return -1;
+    }
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter, (void*) ((char*) in + size * elem_size));
+
+    count = bshuf_trans_bit_elem(in, tmp_buf_bshuf, size, elem_size);
+    if (count < 0) {
+        free(tmp_buf_zstd);
+        free(tmp_buf_bshuf);
+        return count;
+    }
+    nbytes = ZSTD_compress(tmp_buf_zstd, tmp_buf_zstd_size, (const void*)tmp_buf_bshuf,  size * elem_size, comp_lvl);
+    free(tmp_buf_bshuf);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf_zstd);
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter, (void *) ((char *) out + nbytes + 4));
+
+    bshuf_write_uint32_BE(out, nbytes);
+    memcpy((char *) out + 4, tmp_buf_zstd, nbytes);
+
+    free(tmp_buf_zstd);
+
+    return nbytes + 4;
+}
+
+
+/* Decompress and bitunshuffle a single block. */
+int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
+        const size_t size, const size_t elem_size, const int option) {
+
+    int64_t nbytes, count;
+    void *out, *tmp_buf;
+    const void *in;
+    size_t this_iter;
+    int32_t nbytes_from_header;
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    nbytes_from_header = bshuf_read_uint32_BE(in);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + nbytes_from_header + 4));
+
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    nbytes = ZSTD_decompress(tmp_buf, size * elem_size, in + 4, nbytes_from_header);
+    CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
+    if (nbytes != size * elem_size) {
+        free(tmp_buf);
+        return -91;
+    }
+
+    nbytes = nbytes_from_header;
+    count = bshuf_untrans_bit_elem(tmp_buf, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    nbytes += 4;
+
+    free(tmp_buf);
+    return nbytes;
+}
+#endif // ZSTD_SUPPORT
+
+
+/* ---- Public functions ----
+ *
+ * See header file for description and usage.
+ *
+ */
+
+size_t bshuf_compress_lz4_bound(const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    size_t bound, leftover;
+
+    if (block_size == 0) {
+        block_size = bshuf_default_block_size(elem_size);
+    }
+    if (block_size % BSHUF_BLOCKED_MULT) return -81;
+
+    // Note that each block gets a 4 byte header.
+    // Size of full blocks.
+    bound = (LZ4_compressBound(block_size * elem_size) + 4) * (size / block_size);
+    // Size of partial blocks, if any.
+    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
+    if (leftover) bound += LZ4_compressBound(leftover * elem_size) + 4;
+    // Size of uncompressed data not fitting into any blocks.
+    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
+    return bound;
+}
+
+
+int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_compress_lz4_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+
+int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_decompress_lz4_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+#ifdef ZSTD_SUPPORT
+size_t bshuf_compress_zstd_bound(const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    size_t bound, leftover;
+
+    if (block_size == 0) {
+        block_size = bshuf_default_block_size(elem_size);
+    }
+    if (block_size % BSHUF_BLOCKED_MULT) return -81;
+
+    // Note that each block gets a 4 byte header.
+    // Size of full blocks.
+    bound = (ZSTD_compressBound(block_size * elem_size) + 4) * (size / block_size);
+    // Size of partial blocks, if any.
+    leftover = ((size % block_size) / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
+    if (leftover) bound += ZSTD_compressBound(leftover * elem_size) + 4;
+    // Size of uncompressed data not fitting into any blocks.
+    bound += (size % BSHUF_BLOCKED_MULT) * elem_size;
+    return bound;
+}
+
+
+int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size, const int comp_lvl) {
+    return bshuf_blocked_wrap_fun(&bshuf_compress_zstd_block, in, out, size,
+            elem_size, block_size, comp_lvl);
+}
+
+
+int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+    return bshuf_blocked_wrap_fun(&bshuf_decompress_zstd_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+#endif // ZSTD_SUPPORT
diff --git a/src/bitshuffle.h b/src/bitshuffle.h
new file mode 100644
index 00000000..1a13dd17
--- /dev/null
+++ b/src/bitshuffle.h
@@ -0,0 +1,205 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Worker routines return an int64_t which is the number of bytes processed
+ * if positive or an error code if negative.
+ *
+ * Error codes:
+ *      -1    : Failed to allocate memory.
+ *      -11   : Missing SSE.
+ *      -12   : Missing AVX.
+ *      -80   : Input size not a multiple of 8.
+ *      -81   : block_size not multiple of 8.
+ *      -91   : Decompression error, wrong number of bytes processed.
+ *      -1YYY : Error internal to compression routine with error code -YYY.
+ */
+
+
+#ifndef BITSHUFFLE_H
+#define BITSHUFFLE_H
+
+#include <stdlib.h>
+#include "bitshuffle_core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * ---- LZ4 Interface ----
+ */
+
+/* ---- bshuf_compress_lz4_bound ----
+ *
+ * Bound on size of data compressed with *bshuf_compress_lz4*.
+ *
+ * Parameters
+ * ----------
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  Bound on compressed data size.
+ *
+ */
+size_t bshuf_compress_lz4_bound(const size_t size,
+        const size_t elem_size, size_t block_size);
+
+
+/* ---- bshuf_compress_lz4 ----
+ *
+ * Bitshuffled and compress the data using LZ4.
+ *
+ * Transpose within elements, in blocks of data of *block_size* elements then
+ * compress the blocks using LZ4.  In the output buffer, each block is prefixed
+ * by a 4 byte integer giving the compressed size of that block.
+ *
+ * Output buffer must be large enough to hold the compressed data.  This could
+ * be in principle substantially larger than the input buffer.  Use the routine
+ * *bshuf_compress_lz4_bound* to get an upper limit.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be large enough to hold data.
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes used in output buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_compress_lz4(const void* in, void* out, const size_t size, const size_t
+        elem_size, size_t block_size);
+
+
+/* ---- bshuf_decompress_lz4 ----
+ *
+ * Undo compression and bitshuffling.
+ *
+ * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must patch the parameters used to compress the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes consumed in *input* buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_decompress_lz4(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+/*
+ * ---- ZSTD Interface ----
+ */
+
+#ifdef ZSTD_SUPPORT
+/* ---- bshuf_compress_zstd_bound ----
+ *
+ * Bound on size of data compressed with *bshuf_compress_zstd*.
+ *
+ * Parameters
+ * ----------
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  Bound on compressed data size.
+ *
+ */
+size_t bshuf_compress_zstd_bound(const size_t size,
+        const size_t elem_size, size_t block_size);
+
+/* ---- bshuf_compress_zstd ----
+ *
+ * Bitshuffled and compress the data using zstd.
+ *
+ * Transpose within elements, in blocks of data of *block_size* elements then
+ * compress the blocks using ZSTD.  In the output buffer, each block is prefixed
+ * by a 4 byte integer giving the compressed size of that block.
+ *
+ * Output buffer must be large enough to hold the compressed data.  This could
+ * be in principle substantially larger than the input buffer.  Use the routine
+ * *bshuf_compress_zstd_bound* to get an upper limit.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be large enough to hold data.
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *  comp_lvl : compression level applied
+ *
+ * Returns
+ * -------
+ *  number of bytes used in output buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_compress_zstd(const void* in, void* out, const size_t size, const size_t
+        elem_size, size_t block_size, const int comp_lvl);
+
+
+/* ---- bshuf_decompress_zstd ----
+ *
+ * Undo compression and bitshuffling.
+ *
+ * Decompress data then un-bitshuffle it in blocks of *block_size* elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must patch the parameters used to compress the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Process in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes consumed in *input* buffer, negative error-code if failed.
+ *
+ */
+int64_t bshuf_decompress_zstd(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+#endif // ZSTD_SUPPORT
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_H
diff --git a/src/bitshuffle_core.c b/src/bitshuffle_core.c
new file mode 100644
index 00000000..ef33bf55
--- /dev/null
+++ b/src/bitshuffle_core.c
@@ -0,0 +1,1864 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include "bitshuffle_core.h"
+#include "bitshuffle_internals.h"
+
+#include <stdio.h>
+#include <string.h>
+
+
+#if defined(__AVX2__) && defined (__SSE2__)
+#define USEAVX2
+#endif
+
+#if defined(__SSE2__) || defined(NO_WARN_X86_INTRINSICS)
+#define USESSE2
+#endif
+
+#if defined(__ARM_NEON__) || (__ARM_NEON)
+#ifdef __aarch64__
+#define USEARMNEON
+#endif
+#endif
+
+// Conditional includes for SSE2 and AVX2.
+#ifdef USEAVX2
+#include <immintrin.h>
+#elif defined USESSE2
+#include <emmintrin.h>
+#elif defined USEARMNEON
+#include <arm_neon.h>
+#endif
+
+#if defined(_OPENMP) && defined(_MSC_VER)
+typedef int64_t omp_size_t;
+#else
+typedef size_t omp_size_t;
+#endif
+
+// Macros.
+#define CHECK_MULT_EIGHT(n) if (n % 8) return -80;
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+
+
+/* ---- Functions indicating compile time instruction set. ---- */
+
+int bshuf_using_NEON(void) {
+#ifdef USEARMNEON
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+
+int bshuf_using_SSE2(void) {
+#ifdef USESSE2
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+
+int bshuf_using_AVX2(void) {
+#ifdef USEAVX2
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+
+/* ---- Worker code not requiring special instruction sets. ----
+ *
+ * The following code does not use any x86 specific vectorized instructions
+ * and should compile on any machine
+ *
+ */
+
+/* Transpose 8x8 bit array packed into a single quadword *x*.
+ * *t* is workspace. */
+#define TRANS_BIT_8X8(x, t) {                                               \
+        t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL;                          \
+        x = x ^ t ^ (t << 7);                                               \
+        t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL;                         \
+        x = x ^ t ^ (t << 14);                                              \
+        t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL;                         \
+        x = x ^ t ^ (t << 28);                                              \
+    }
+
+/* Transpose 8x8 bit array along the diagonal from upper right
+   to lower left */
+#define TRANS_BIT_8X8_BE(x, t) {                                            \
+        t = (x ^ (x >> 9)) & 0x0055005500550055LL;                          \
+        x = x ^ t ^ (t << 9);                                               \
+        t = (x ^ (x >> 18)) & 0x0000333300003333LL;                         \
+        x = x ^ t ^ (t << 18);                                              \
+        t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL;                         \
+        x = x ^ t ^ (t << 36);                                              \
+    }
+
+/* Transpose of an array of arbitrarily typed elements. */
+#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) {                        \
+        size_t ii, jj, kk;                                                  \
+        const type_t* in_type = (const type_t*) in;                                 \
+        type_t* out_type = (type_t*) out;                                   \
+        for(ii = 0; ii + 7 < lda; ii += 8) {                                \
+            for(jj = 0; jj < ldb; jj++) {                                   \
+                for(kk = 0; kk < 8; kk++) {                                 \
+                    out_type[jj*lda + ii + kk] =                            \
+                        in_type[ii*ldb + kk * ldb + jj];                    \
+                }                                                           \
+            }                                                               \
+        }                                                                   \
+        for(ii = lda - lda % 8; ii < lda; ii ++) {                          \
+            for(jj = 0; jj < ldb; jj++) {                                   \
+                out_type[jj*lda + ii] = in_type[ii*ldb + jj];                            \
+            }                                                               \
+        }                                                                   \
+    }
+
+
+/* Memory copy with bshuf call signature. For testing and profiling. */
+int64_t bshuf_copy(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    memcpy(out_b, in_b, size * elem_size);
+    return size * elem_size;
+}
+
+
+/* Transpose bytes within elements, starting partway through input. */
+int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size,
+         const size_t elem_size, const size_t start) {
+
+    size_t ii, jj, kk;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    CHECK_MULT_EIGHT(start);
+
+    if (size > start) {
+        // ii loop separated into 2 loops so the compiler can unroll
+        // the inner one.
+        for (ii = start; ii + 7 < size; ii += 8) {
+            for (jj = 0; jj < elem_size; jj++) {
+                for (kk = 0; kk < 8; kk++) {
+                    out_b[jj * size + ii + kk]
+                        = in_b[ii * elem_size + kk * elem_size + jj];
+                }
+            }
+        }
+        for (ii = size - size % 8; ii < size; ii ++) {
+            for (jj = 0; jj < elem_size; jj++) {
+                out_b[jj * size + ii] = in_b[ii * elem_size + jj];
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Transpose bytes within elements. */
+int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0);
+}
+
+
+/* Transpose bits within bytes. */
+int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size,
+         const size_t elem_size, const size_t start_byte) {
+
+    const uint64_t* in_b = (const uint64_t*) in;
+    uint8_t* out_b = (uint8_t*) out;
+
+    uint64_t x, t;
+
+    size_t ii, kk;
+    size_t nbyte = elem_size * size;
+    size_t nbyte_bitrow = nbyte / 8;
+
+    uint64_t e=1;
+    const int little_endian = *(uint8_t *) &e == 1;
+    const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow;
+    const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow;
+
+    CHECK_MULT_EIGHT(nbyte);
+    CHECK_MULT_EIGHT(start_byte);
+
+    for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) {
+        x = in_b[ii];
+        if (little_endian) {
+            TRANS_BIT_8X8(x, t);
+        } else {
+            TRANS_BIT_8X8_BE(x, t);
+        }
+        for (kk = 0; kk < 8; kk ++) {
+            out_b[bit_row_offset + kk * bit_row_skip + ii] = x;
+            x = x >> 8;
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Transpose bits within bytes. */
+int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0);
+}
+
+
+/* General transpose of an array, optimized for large element sizes. */
+int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda,
+        const size_t ldb, const size_t elem_size) {
+
+    size_t ii, jj;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    for(ii = 0; ii < lda; ii++) {
+        for(jj = 0; jj < ldb; jj++) {
+            memcpy(&out_b[(jj*lda + ii) * elem_size],
+                   &in_b[(ii*ldb + jj) * elem_size], elem_size);
+        }
+    }
+    return lda * ldb * elem_size;
+}
+
+
+/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */
+int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t nbyte_bitrow = size / 8;
+
+    CHECK_MULT_EIGHT(size);
+
+    return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow);
+}
+
+
+/* Transpose bits within elements. */
+int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+    void *tmp_buf;
+
+    CHECK_MULT_EIGHT(size);
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+
+/* For data organized into a row for each bit (8 * elem_size rows), transpose
+ * the bytes. */
+int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    size_t ii, jj, kk, nbyte_row;
+    const char *in_b;
+    char *out_b;
+
+
+    in_b = (const char*) in;
+    out_b = (char*) out;
+
+    nbyte_row = size / 8;
+
+    CHECK_MULT_EIGHT(size);
+
+    for (jj = 0; jj < elem_size; jj++) {
+        for (ii = 0; ii < nbyte_row; ii++) {
+            for (kk = 0; kk < 8; kk++) {
+                out_b[ii * 8 * elem_size + jj * 8 + kk] = \
+                        in_b[(jj * 8 + kk) * nbyte_row + ii];
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Shuffle bits within the bytes of eight element blocks. */
+int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \
+        const size_t size, const size_t elem_size) {
+
+    const char *in_b;
+    char *out_b;
+    uint64_t x, t;
+    size_t ii, jj, kk;
+    size_t nbyte, out_index;
+
+    uint64_t e=1;
+    const int little_endian = *(uint8_t *) &e == 1;
+    const size_t elem_skip = little_endian ? elem_size : -elem_size;
+    const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size;
+
+    CHECK_MULT_EIGHT(size);
+
+    in_b = (const char*) in;
+    out_b = (char*) out;
+
+    nbyte = elem_size * size;
+
+    for (jj = 0; jj < 8 * elem_size; jj += 8) {
+        for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) {
+            x = *((uint64_t*) &in_b[ii + jj]);
+            if (little_endian) {
+                TRANS_BIT_8X8(x, t);
+            } else {
+                TRANS_BIT_8X8_BE(x, t);
+            }
+            for (kk = 0; kk < 8; kk++) {
+                out_index = ii + jj / 8 + elem_offset + kk * elem_skip;
+                *((uint8_t*) &out_b[out_index]) = x;
+                x = x >> 8;
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Untranspose bits within elements. */
+int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+    void *tmp_buf;
+
+    CHECK_MULT_EIGHT(size);
+
+    tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count =  bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+
+/* ---- Worker code that uses Arm NEON ----
+ *
+ * The following code makes use of the Arm NEON instruction set.
+ * NEON technology is the implementation of the ARM Advanced Single
+ * Instruction Multiple Data (SIMD) extension.
+ * The NEON unit is the component of the processor that executes SIMD instructions.
+ * It is also called the NEON Media Processing Engine (MPE).
+ *
+ */
+
+#ifdef USEARMNEON
+
+/* Transpose bytes within elements for 16 bit elements. */
+int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char *in_b = (const char*) in;
+    char *out_b = (char*) out;
+    int8x16_t a0, b0, a1, b1;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = vld1q_s8(in_b + 2*ii + 0*16);
+        b0 = vld1q_s8(in_b + 2*ii + 1*16);
+
+        a1 = vzip1q_s8(a0, b0);
+        b1 = vzip2q_s8(a0, b0);
+
+        a0 = vzip1q_s8(a1, b1);
+        b0 = vzip2q_s8(a1, b1);
+
+        a1 = vzip1q_s8(a0, b0);
+        b1 = vzip2q_s8(a0, b0);
+
+        a0 = vzip1q_s8(a1, b1);
+        b0 = vzip2q_s8(a1, b1);
+
+        vst1q_s8(out_b + 0*size + ii, a0);
+        vst1q_s8(out_b + 1*size + ii, b0);
+    }
+
+    return bshuf_trans_byte_elem_remainder(in, out, size, 2,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements for 32 bit elements. */
+int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char *in_b;
+    char *out_b;
+    in_b = (const char*) in;
+    out_b = (char*) out;
+    int8x16_t a0, b0, c0, d0, a1, b1, c1, d1;
+    int64x2_t a2, b2, c2, d2;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = vld1q_s8(in_b + 4*ii + 0*16);
+        b0 = vld1q_s8(in_b + 4*ii + 1*16);
+        c0 = vld1q_s8(in_b + 4*ii + 2*16);
+        d0 = vld1q_s8(in_b + 4*ii + 3*16);
+
+        a1 = vzip1q_s8(a0, b0);
+        b1 = vzip2q_s8(a0, b0);
+        c1 = vzip1q_s8(c0, d0);
+        d1 = vzip2q_s8(c0, d0);
+
+        a0 = vzip1q_s8(a1, b1);
+        b0 = vzip2q_s8(a1, b1);
+        c0 = vzip1q_s8(c1, d1);
+        d0 = vzip2q_s8(c1, d1);
+
+        a1 = vzip1q_s8(a0, b0);
+        b1 = vzip2q_s8(a0, b0);
+        c1 = vzip1q_s8(c0, d0);
+        d1 = vzip2q_s8(c0, d0);
+
+        a2 = vzip1q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
+        b2 = vzip2q_s64(vreinterpretq_s64_s8(a1), vreinterpretq_s64_s8(c1));
+        c2 = vzip1q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
+        d2 = vzip2q_s64(vreinterpretq_s64_s8(b1), vreinterpretq_s64_s8(d1));
+
+        vst1q_s64((int64_t *) (out_b + 0*size + ii), a2);
+        vst1q_s64((int64_t *) (out_b + 1*size + ii), b2);
+        vst1q_s64((int64_t *) (out_b + 2*size + ii), c2);
+        vst1q_s64((int64_t *) (out_b + 3*size + ii), d2);
+    }
+
+    return bshuf_trans_byte_elem_remainder(in, out, size, 4,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements for 64 bit elements. */
+int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
+    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = vld1q_s8(in_b + 8*ii + 0*16);
+        b0 = vld1q_s8(in_b + 8*ii + 1*16);
+        c0 = vld1q_s8(in_b + 8*ii + 2*16);
+        d0 = vld1q_s8(in_b + 8*ii + 3*16);
+        e0 = vld1q_s8(in_b + 8*ii + 4*16);
+        f0 = vld1q_s8(in_b + 8*ii + 5*16);
+        g0 = vld1q_s8(in_b + 8*ii + 6*16);
+        h0 = vld1q_s8(in_b + 8*ii + 7*16);
+
+        a1 = vzip1q_s8 (a0, b0);
+        b1 = vzip2q_s8 (a0, b0);
+        c1 = vzip1q_s8 (c0, d0);
+        d1 = vzip2q_s8 (c0, d0);
+        e1 = vzip1q_s8 (e0, f0);
+        f1 = vzip2q_s8 (e0, f0);
+        g1 = vzip1q_s8 (g0, h0);
+        h1 = vzip2q_s8 (g0, h0);
+
+        a0 = vzip1q_s8 (a1, b1);
+        b0 = vzip2q_s8 (a1, b1);
+        c0 = vzip1q_s8 (c1, d1);
+        d0 = vzip2q_s8 (c1, d1);
+        e0 = vzip1q_s8 (e1, f1);
+        f0 = vzip2q_s8 (e1, f1);
+        g0 = vzip1q_s8 (g1, h1);
+        h0 = vzip2q_s8 (g1, h1);
+
+        a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
+        b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (c0));
+        c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
+        d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (b0), vreinterpretq_s32_s8 (d0));
+        e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
+        f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (g0));
+        g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
+        h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (f0), vreinterpretq_s32_s8 (h0));
+
+        a0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
+        b0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (a1), vreinterpretq_s64_s8 (e1));
+        c0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
+        d0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (b1), vreinterpretq_s64_s8 (f1));
+        e0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
+        f0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (c1), vreinterpretq_s64_s8 (g1));
+        g0 = (int8x16_t) vzip1q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
+        h0 = (int8x16_t) vzip2q_s64 (vreinterpretq_s64_s8 (d1), vreinterpretq_s64_s8 (h1));
+
+        vst1q_s8(out_b + 0*size + ii, a0);
+        vst1q_s8(out_b + 1*size + ii, b0);
+        vst1q_s8(out_b + 2*size + ii, c0);
+        vst1q_s8(out_b + 3*size + ii, d0);
+        vst1q_s8(out_b + 4*size + ii, e0);
+        vst1q_s8(out_b + 5*size + ii, f0);
+        vst1q_s8(out_b + 6*size + ii, g0);
+        vst1q_s8(out_b + 7*size + ii, h0);
+    }
+
+    return bshuf_trans_byte_elem_remainder(in, out, size, 8,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements using best NEON algorithm available. */
+int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    // Trivial cases: power of 2 bytes.
+    switch (elem_size) {
+        case 1:
+            count = bshuf_copy(in, out, size, elem_size);
+            return count;
+        case 2:
+            count = bshuf_trans_byte_elem_NEON_16(in, out, size);
+            return count;
+        case 4:
+            count = bshuf_trans_byte_elem_NEON_32(in, out, size);
+            return count;
+        case 8:
+            count = bshuf_trans_byte_elem_NEON_64(in, out, size);
+            return count;
+    }
+
+    // Worst case: odd number of bytes. Turns out that this is faster for
+    // (odd * 2) byte elements as well (hence % 4).
+    if (elem_size % 4) {
+        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
+        return count;
+    }
+
+    // Multiple of power of 2: transpose hierarchically.
+    {
+        size_t nchunk_elem;
+        void* tmp_buf = malloc(size * elem_size);
+        if (tmp_buf == NULL) return -1;
+
+        if ((elem_size % 8) == 0) {
+            nchunk_elem = elem_size / 8;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
+            count = bshuf_trans_byte_elem_NEON_64(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
+        } else if ((elem_size % 4) == 0) {
+            nchunk_elem = elem_size / 4;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
+            count = bshuf_trans_byte_elem_NEON_32(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
+        } else {
+            // Not used since scalar algorithm is faster.
+            nchunk_elem = elem_size / 2;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
+            count = bshuf_trans_byte_elem_NEON_16(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
+        }
+
+        free(tmp_buf);
+        return count;
+    }
+}
+
+
+/* Creates a mask made up of the most significant
+ * bit of each byte of 'input'
+ */
+int32_t move_byte_mask_neon(uint8x16_t input) {
+
+    return (  ((input[0] & 0x80) >> 7)          | (((input[1] & 0x80) >> 7) << 1)   | (((input[2] & 0x80) >> 7) << 2)   | (((input[3] & 0x80) >> 7) << 3)
+            | (((input[4] & 0x80) >> 7) << 4)   | (((input[5] & 0x80) >> 7) << 5)   | (((input[6] & 0x80) >> 7) << 6)   | (((input[7] & 0x80) >> 7) << 7)
+            | (((input[8] & 0x80) >> 7) << 8)   | (((input[9] & 0x80) >> 7) << 9)   | (((input[10] & 0x80) >> 7) << 10) | (((input[11] & 0x80) >> 7) << 11)
+            | (((input[12] & 0x80) >> 7) << 12) | (((input[13] & 0x80) >> 7) << 13) | (((input[14] & 0x80) >> 7) << 14) | (((input[15] & 0x80) >> 7) << 15)
+           );
+}
+
+/* Transpose bits within bytes. */
+int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t ii, kk;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    uint16_t* out_ui16;
+
+    int64_t count;
+
+    size_t nbyte = elem_size * size;
+
+    CHECK_MULT_EIGHT(nbyte);
+
+    int16x8_t xmm;
+    int32_t bt;
+
+    for (ii = 0; ii + 15 < nbyte; ii += 16) {
+        xmm = vld1q_s16((int16_t *) (in_b + ii));
+        for (kk = 0; kk < 8; kk++) {
+            bt = move_byte_mask_neon((uint8x16_t) xmm);
+            xmm = vshlq_n_s16(xmm, 1);
+            out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
+            *out_ui16 = bt;
+        }
+    }
+    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
+            nbyte - nbyte % 16);
+    return count;
+}
+
+
+/* Transpose bits within elements. */
+int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_elem_NEON(in, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bit_byte_NEON(out, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+
+/* For data organized into a row for each bit (8 * elem_size rows), transpose
+ * the bytes. */
+int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t ii, jj;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    CHECK_MULT_EIGHT(size);
+
+    size_t nrows = 8 * elem_size;
+    size_t nbyte_row = size / 8;
+
+    int8x16_t a0, b0, c0, d0, e0, f0, g0, h0;
+    int8x16_t a1, b1, c1, d1, e1, f1, g1, h1;
+    int64x1_t *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
+
+    for (ii = 0; ii + 7 < nrows; ii += 8) {
+        for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
+            a0 = vld1q_s8(in_b + (ii + 0)*nbyte_row + jj);
+            b0 = vld1q_s8(in_b + (ii + 1)*nbyte_row + jj);
+            c0 = vld1q_s8(in_b + (ii + 2)*nbyte_row + jj);
+            d0 = vld1q_s8(in_b + (ii + 3)*nbyte_row + jj);
+            e0 = vld1q_s8(in_b + (ii + 4)*nbyte_row + jj);
+            f0 = vld1q_s8(in_b + (ii + 5)*nbyte_row + jj);
+            g0 = vld1q_s8(in_b + (ii + 6)*nbyte_row + jj);
+            h0 = vld1q_s8(in_b + (ii + 7)*nbyte_row + jj);
+
+            a1 = vzip1q_s8(a0, b0);
+            b1 = vzip1q_s8(c0, d0);
+            c1 = vzip1q_s8(e0, f0);
+            d1 = vzip1q_s8(g0, h0);
+            e1 = vzip2q_s8(a0, b0);
+            f1 = vzip2q_s8(c0, d0);
+            g1 = vzip2q_s8(e0, f0);
+            h1 = vzip2q_s8(g0, h0);
+
+            a0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
+            b0=  (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
+            c0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (a1), vreinterpretq_s16_s8 (b1));
+            d0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (c1), vreinterpretq_s16_s8 (d1));
+            e0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
+            f0 = (int8x16_t) vzip1q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
+            g0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (e1), vreinterpretq_s16_s8 (f1));
+            h0 = (int8x16_t) vzip2q_s16 (vreinterpretq_s16_s8 (g1), vreinterpretq_s16_s8 (h1));
+
+            a1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
+            b1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (a0), vreinterpretq_s32_s8 (b0));
+            c1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
+            d1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (c0), vreinterpretq_s32_s8 (d0));
+            e1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
+            f1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (e0), vreinterpretq_s32_s8 (f0));
+            g1 = (int8x16_t) vzip1q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
+            h1 = (int8x16_t) vzip2q_s32 (vreinterpretq_s32_s8 (g0), vreinterpretq_s32_s8 (h0));
+
+            as = (int64x1_t *) &a1;
+            bs = (int64x1_t *) &b1;
+            cs = (int64x1_t *) &c1;
+            ds = (int64x1_t *) &d1;
+            es = (int64x1_t *) &e1;
+            fs = (int64x1_t *) &f1;
+            gs = (int64x1_t *) &g1;
+            hs = (int64x1_t *) &h1;
+
+            vst1_s64((int64_t *)(out_b + (jj + 0) * nrows + ii), *as);
+            vst1_s64((int64_t *)(out_b + (jj + 1) * nrows + ii), *(as + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 2) * nrows + ii), *bs);
+            vst1_s64((int64_t *)(out_b + (jj + 3) * nrows + ii), *(bs + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 4) * nrows + ii), *cs);
+            vst1_s64((int64_t *)(out_b + (jj + 5) * nrows + ii), *(cs + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 6) * nrows + ii), *ds);
+            vst1_s64((int64_t *)(out_b + (jj + 7) * nrows + ii), *(ds + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 8) * nrows + ii), *es);
+            vst1_s64((int64_t *)(out_b + (jj + 9) * nrows + ii), *(es + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 10) * nrows + ii), *fs);
+            vst1_s64((int64_t *)(out_b + (jj + 11) * nrows + ii), *(fs + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 12) * nrows + ii), *gs);
+            vst1_s64((int64_t *)(out_b + (jj + 13) * nrows + ii), *(gs + 1));
+            vst1_s64((int64_t *)(out_b + (jj + 14) * nrows + ii), *hs);
+            vst1_s64((int64_t *)(out_b + (jj + 15) * nrows + ii), *(hs + 1));
+        }
+        for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
+            out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Shuffle bits within the bytes of eight element blocks. */
+int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    CHECK_MULT_EIGHT(size);
+
+    // With a bit of care, this could be written such that such that it is
+    // in_buf = out_buf safe.
+    const char* in_b = (const char*) in;
+    uint16_t* out_ui16 = (uint16_t*) out;
+
+    size_t ii, jj, kk;
+    size_t nbyte = elem_size * size;
+
+    int16x8_t xmm;
+    int32_t bt;
+
+    if (elem_size % 2) {
+        bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
+    } else {
+        for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
+                ii += 8 * elem_size) {
+            for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
+                xmm = vld1q_s16((int16_t *) &in_b[ii + jj]);
+                for (kk = 0; kk < 8; kk++) {
+                    bt = move_byte_mask_neon((uint8x16_t) xmm);
+                    xmm = vshlq_n_s16(xmm, 1);
+                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
+                    out_ui16[ind / 2] = bt;
+                }
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Untranspose bits within elements. */
+int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_bitrow_NEON(in, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count =  bshuf_shuffle_bit_eightelem_NEON(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+#else // #ifdef USEARMNEON
+
+int64_t bshuf_untrans_bit_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_bit_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_byte_bitrow_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_bit_byte_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_byte_elem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_byte_elem_NEON_64(const void* in, void* out, const size_t size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_byte_elem_NEON_32(const void* in, void* out, const size_t size) {
+    return -13;
+}
+
+
+int64_t bshuf_trans_byte_elem_NEON_16(const void* in, void* out, const size_t size) {
+    return -13;
+}
+
+
+int64_t bshuf_shuffle_bit_eightelem_NEON(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -13;
+}
+
+
+#endif
+
+
+
+
+
+/* ---- Worker code that uses SSE2 ----
+ *
+ * The following code makes use of the SSE2 instruction set and specialized
+ * 16 byte registers. The SSE2 instructions are present on modern x86 
+ * processors. The first Intel processor microarchitecture supporting SSE2 was
+ * Pentium 4 (2000).
+ *
+ */
+
+#ifdef USESSE2
+
+/* Transpose bytes within elements for 16 bit elements. */
+int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char *in_b = (const char*) in;
+    char *out_b = (char*) out;
+    __m128i a0, b0, a1, b1;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]);
+        b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]);
+
+        a1 = _mm_unpacklo_epi8(a0, b0);
+        b1 = _mm_unpackhi_epi8(a0, b0);
+
+        a0 = _mm_unpacklo_epi8(a1, b1);
+        b0 = _mm_unpackhi_epi8(a1, b1);
+
+        a1 = _mm_unpacklo_epi8(a0, b0);
+        b1 = _mm_unpackhi_epi8(a0, b0);
+
+        a0 = _mm_unpacklo_epi8(a1, b1);
+        b0 = _mm_unpackhi_epi8(a1, b1);
+
+        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
+        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
+    }
+    return bshuf_trans_byte_elem_remainder(in, out, size, 2,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements for 32 bit elements. */
+int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char *in_b;
+    char *out_b;
+    in_b = (const char*) in;
+    out_b = (char*) out;
+    __m128i a0, b0, c0, d0, a1, b1, c1, d1;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]);
+        b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]);
+        c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]);
+        d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]);
+
+        a1 = _mm_unpacklo_epi8(a0, b0);
+        b1 = _mm_unpackhi_epi8(a0, b0);
+        c1 = _mm_unpacklo_epi8(c0, d0);
+        d1 = _mm_unpackhi_epi8(c0, d0);
+
+        a0 = _mm_unpacklo_epi8(a1, b1);
+        b0 = _mm_unpackhi_epi8(a1, b1);
+        c0 = _mm_unpacklo_epi8(c1, d1);
+        d0 = _mm_unpackhi_epi8(c1, d1);
+
+        a1 = _mm_unpacklo_epi8(a0, b0);
+        b1 = _mm_unpackhi_epi8(a0, b0);
+        c1 = _mm_unpacklo_epi8(c0, d0);
+        d1 = _mm_unpackhi_epi8(c0, d0);
+
+        a0 = _mm_unpacklo_epi64(a1, c1);
+        b0 = _mm_unpackhi_epi64(a1, c1);
+        c0 = _mm_unpacklo_epi64(b1, d1);
+        d0 = _mm_unpackhi_epi64(b1, d1);
+
+        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
+        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
+        _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0);
+        _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0);
+    }
+    return bshuf_trans_byte_elem_remainder(in, out, size, 4,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements for 64 bit elements. */
+int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) {
+
+    size_t ii;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    __m128i a0, b0, c0, d0, e0, f0, g0, h0;
+    __m128i a1, b1, c1, d1, e1, f1, g1, h1;
+
+    for (ii=0; ii + 15 < size; ii += 16) {
+        a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]);
+        b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]);
+        c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]);
+        d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]);
+        e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]);
+        f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]);
+        g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]);
+        h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]);
+
+        a1 = _mm_unpacklo_epi8(a0, b0);
+        b1 = _mm_unpackhi_epi8(a0, b0);
+        c1 = _mm_unpacklo_epi8(c0, d0);
+        d1 = _mm_unpackhi_epi8(c0, d0);
+        e1 = _mm_unpacklo_epi8(e0, f0);
+        f1 = _mm_unpackhi_epi8(e0, f0);
+        g1 = _mm_unpacklo_epi8(g0, h0);
+        h1 = _mm_unpackhi_epi8(g0, h0);
+
+        a0 = _mm_unpacklo_epi8(a1, b1);
+        b0 = _mm_unpackhi_epi8(a1, b1);
+        c0 = _mm_unpacklo_epi8(c1, d1);
+        d0 = _mm_unpackhi_epi8(c1, d1);
+        e0 = _mm_unpacklo_epi8(e1, f1);
+        f0 = _mm_unpackhi_epi8(e1, f1);
+        g0 = _mm_unpacklo_epi8(g1, h1);
+        h0 = _mm_unpackhi_epi8(g1, h1);
+
+        a1 = _mm_unpacklo_epi32(a0, c0);
+        b1 = _mm_unpackhi_epi32(a0, c0);
+        c1 = _mm_unpacklo_epi32(b0, d0);
+        d1 = _mm_unpackhi_epi32(b0, d0);
+        e1 = _mm_unpacklo_epi32(e0, g0);
+        f1 = _mm_unpackhi_epi32(e0, g0);
+        g1 = _mm_unpacklo_epi32(f0, h0);
+        h1 = _mm_unpackhi_epi32(f0, h0);
+
+        a0 = _mm_unpacklo_epi64(a1, e1);
+        b0 = _mm_unpackhi_epi64(a1, e1);
+        c0 = _mm_unpacklo_epi64(b1, f1);
+        d0 = _mm_unpackhi_epi64(b1, f1);
+        e0 = _mm_unpacklo_epi64(c1, g1);
+        f0 = _mm_unpackhi_epi64(c1, g1);
+        g0 = _mm_unpacklo_epi64(d1, h1);
+        h0 = _mm_unpackhi_epi64(d1, h1);
+
+        _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0);
+        _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0);
+        _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0);
+        _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0);
+        _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0);
+        _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0);
+        _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0);
+        _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0);
+    }
+    return bshuf_trans_byte_elem_remainder(in, out, size, 8,
+            size - size % 16);
+}
+
+
+/* Transpose bytes within elements using best SSE algorithm available. */
+int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    // Trivial cases: power of 2 bytes.
+    switch (elem_size) {
+        case 1:
+            count = bshuf_copy(in, out, size, elem_size);
+            return count;
+        case 2:
+            count = bshuf_trans_byte_elem_SSE_16(in, out, size);
+            return count;
+        case 4:
+            count = bshuf_trans_byte_elem_SSE_32(in, out, size);
+            return count;
+        case 8:
+            count = bshuf_trans_byte_elem_SSE_64(in, out, size);
+            return count;
+    }
+
+    // Worst case: odd number of bytes. Turns out that this is faster for
+    // (odd * 2) byte elements as well (hence % 4).
+    if (elem_size % 4) {
+        count = bshuf_trans_byte_elem_scal(in, out, size, elem_size);
+        return count;
+    }
+
+    // Multiple of power of 2: transpose hierarchically.
+    {
+        size_t nchunk_elem;
+        void* tmp_buf = malloc(size * elem_size);
+        if (tmp_buf == NULL) return -1;
+
+        if ((elem_size % 8) == 0) {
+            nchunk_elem = elem_size / 8;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t);
+            count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size);
+        } else if ((elem_size % 4) == 0) {
+            nchunk_elem = elem_size / 4;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t);
+            count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size);
+        } else {
+            // Not used since scalar algorithm is faster.
+            nchunk_elem = elem_size / 2;
+            TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t);
+            count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf,
+                    size * nchunk_elem);
+            bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size);
+        }
+
+        free(tmp_buf);
+        return count;
+    }
+}
+
+
+/* Transpose bits within bytes. */
+int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t ii, kk;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    uint16_t* out_ui16;
+
+    int64_t count;
+
+    size_t nbyte = elem_size * size;
+
+    CHECK_MULT_EIGHT(nbyte);
+
+    __m128i xmm;
+    int32_t bt;
+
+    for (ii = 0; ii + 15 < nbyte; ii += 16) {
+        xmm = _mm_loadu_si128((__m128i *) &in_b[ii]);
+        for (kk = 0; kk < 8; kk++) {
+            bt = _mm_movemask_epi8(xmm);
+            xmm = _mm_slli_epi16(xmm, 1);
+            out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
+            *out_ui16 = bt;
+        }
+    }
+    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
+            nbyte - nbyte % 16);
+    return count;
+}
+
+
+/* Transpose bits within elements. */
+int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bit_byte_SSE(out, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+
+/* For data organized into a row for each bit (8 * elem_size rows), transpose
+ * the bytes. */
+int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t ii, jj;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    CHECK_MULT_EIGHT(size);
+
+    size_t nrows = 8 * elem_size;
+    size_t nbyte_row = size / 8;
+
+    __m128i a0, b0, c0, d0, e0, f0, g0, h0;
+    __m128i a1, b1, c1, d1, e1, f1, g1, h1;
+    __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs;
+
+    for (ii = 0; ii + 7 < nrows; ii += 8) {
+        for (jj = 0; jj + 15 < nbyte_row; jj += 16) {
+            a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]);
+            b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]);
+            c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]);
+            d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]);
+            e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]);
+            f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]);
+            g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]);
+            h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]);
+
+
+            a1 = _mm_unpacklo_epi8(a0, b0);
+            b1 = _mm_unpacklo_epi8(c0, d0);
+            c1 = _mm_unpacklo_epi8(e0, f0);
+            d1 = _mm_unpacklo_epi8(g0, h0);
+            e1 = _mm_unpackhi_epi8(a0, b0);
+            f1 = _mm_unpackhi_epi8(c0, d0);
+            g1 = _mm_unpackhi_epi8(e0, f0);
+            h1 = _mm_unpackhi_epi8(g0, h0);
+
+
+            a0 = _mm_unpacklo_epi16(a1, b1);
+            b0 = _mm_unpacklo_epi16(c1, d1);
+            c0 = _mm_unpackhi_epi16(a1, b1);
+            d0 = _mm_unpackhi_epi16(c1, d1);
+
+            e0 = _mm_unpacklo_epi16(e1, f1);
+            f0 = _mm_unpacklo_epi16(g1, h1);
+            g0 = _mm_unpackhi_epi16(e1, f1);
+            h0 = _mm_unpackhi_epi16(g1, h1);
+
+
+            a1 = _mm_unpacklo_epi32(a0, b0);
+            b1 = _mm_unpackhi_epi32(a0, b0);
+
+            c1 = _mm_unpacklo_epi32(c0, d0);
+            d1 = _mm_unpackhi_epi32(c0, d0);
+
+            e1 = _mm_unpacklo_epi32(e0, f0);
+            f1 = _mm_unpackhi_epi32(e0, f0);
+
+            g1 = _mm_unpacklo_epi32(g0, h0);
+            h1 = _mm_unpackhi_epi32(g0, h0);
+
+            // We don't have a storeh instruction for integers, so interpret
+            // as a float. Have a storel (_mm_storel_epi64).
+            as = (__m128 *) &a1;
+            bs = (__m128 *) &b1;
+            cs = (__m128 *) &c1;
+            ds = (__m128 *) &d1;
+            es = (__m128 *) &e1;
+            fs = (__m128 *) &f1;
+            gs = (__m128 *) &g1;
+            hs = (__m128 *) &h1;
+
+            _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs);
+            _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs);
+
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs);
+            _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs);
+        }
+        for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) {
+            out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj];
+            out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj];
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Shuffle bits within the bytes of eight element blocks. */
+int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    CHECK_MULT_EIGHT(size);
+
+    // With a bit of care, this could be written such that such that it is
+    // in_buf = out_buf safe.
+    const char* in_b = (const char*) in;
+    uint16_t* out_ui16 = (uint16_t*) out;
+
+    size_t ii, jj, kk;
+    size_t nbyte = elem_size * size;
+
+    __m128i xmm;
+    int32_t bt;
+
+    if (elem_size % 2) {
+        bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size);
+    } else {
+        for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
+                ii += 8 * elem_size) {
+            for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) {
+                xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]);
+                for (kk = 0; kk < 8; kk++) {
+                    bt = _mm_movemask_epi8(xmm);
+                    xmm = _mm_slli_epi16(xmm, 1);
+                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
+                    out_ui16[ind / 2] = bt;
+                }
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Untranspose bits within elements. */
+int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_bitrow_SSE(in, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count =  bshuf_shuffle_bit_eightelem_SSE(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+#else // #ifdef USESSE2
+
+
+int64_t bshuf_untrans_bit_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_bit_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_byte_bitrow_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_bit_byte_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_byte_elem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_byte_elem_SSE_64(const void* in, void* out, const size_t size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_byte_elem_SSE_32(const void* in, void* out, const size_t size) {
+    return -11;
+}
+
+
+int64_t bshuf_trans_byte_elem_SSE_16(const void* in, void* out, const size_t size) {
+    return -11;
+}
+
+
+int64_t bshuf_shuffle_bit_eightelem_SSE(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -11;
+}
+
+
+#endif // #ifdef USESSE2
+
+
+/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */
+
+/* ---- Worker code that uses AVX2 ----
+ *
+ * The following code makes use of the AVX2 instruction set and specialized
+ * 32 byte registers. The AVX2 instructions are present on newer x86
+ * processors. The first Intel processor microarchitecture supporting AVX2 was
+ * Haswell (2013).
+ *
+ */
+
+#ifdef USEAVX2
+
+/* Transpose bits within bytes. */
+int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t ii, kk;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+    int32_t* out_i32;
+
+    size_t nbyte = elem_size * size;
+
+    int64_t count;
+
+    __m256i ymm;
+    int32_t bt;
+
+    for (ii = 0; ii + 31 < nbyte; ii += 32) {
+        ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]);
+        for (kk = 0; kk < 8; kk++) {
+            bt = _mm256_movemask_epi8(ymm);
+            ymm = _mm256_slli_epi16(ymm, 1);
+            out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8];
+            *out_i32 = bt;
+        }
+    }
+    count = bshuf_trans_bit_byte_remainder(in, out, size, elem_size,
+            nbyte - nbyte % 32);
+    return count;
+}
+
+
+/* Transpose bits within elements. */
+int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_elem_SSE(in, out, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bit_byte_AVX(out, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+
+    return count;
+}
+
+
+/* For data organized into a row for each bit (8 * elem_size rows), transpose
+ * the bytes. */
+int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    size_t hh, ii, jj, kk, mm;
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    CHECK_MULT_EIGHT(size);
+
+    size_t nrows = 8 * elem_size;
+    size_t nbyte_row = size / 8;
+
+    if (elem_size % 4) return bshuf_trans_byte_bitrow_SSE(in, out, size,
+            elem_size);
+
+    __m256i ymm_0[8];
+    __m256i ymm_1[8];
+    __m256i ymm_storeage[8][4];
+
+    for (jj = 0; jj + 31 < nbyte_row; jj += 32) {
+        for (ii = 0; ii + 3 < elem_size; ii += 4) {
+            for (hh = 0; hh < 4; hh ++) {
+
+                for (kk = 0; kk < 8; kk ++){
+                    ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[
+                            (ii * 8 + hh * 8 + kk) * nbyte_row + jj]);
+                }
+
+                for (kk = 0; kk < 4; kk ++){
+                    ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2],
+                            ymm_0[kk * 2 + 1]);
+                    ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2],
+                            ymm_0[kk * 2 + 1]);
+                }
+
+                for (kk = 0; kk < 2; kk ++){
+                    for (mm = 0; mm < 2; mm ++){
+                        ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16(
+                                ymm_1[kk * 4 + mm * 2],
+                                ymm_1[kk * 4 + mm * 2 + 1]);
+                        ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16(
+                                ymm_1[kk * 4 + mm * 2],
+                                ymm_1[kk * 4 + mm * 2 + 1]);
+                    }
+                }
+
+                for (kk = 0; kk < 4; kk ++){
+                    ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2],
+                            ymm_0[kk * 2 + 1]);
+                    ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2],
+                            ymm_0[kk * 2 + 1]);
+                }
+
+                for (kk = 0; kk < 8; kk ++){
+                    ymm_storeage[kk][hh] = ymm_1[kk];
+                }
+            }
+
+            for (mm = 0; mm < 8; mm ++) {
+
+                for (kk = 0; kk < 4; kk ++){
+                    ymm_0[kk] = ymm_storeage[mm][kk];
+                }
+
+                ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]);
+                ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]);
+                ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]);
+                ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]);
+
+                ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32);
+                ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32);
+                ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49);
+                ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49);
+
+                _mm256_storeu_si256((__m256i *) &out_b[
+                        (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]);
+                _mm256_storeu_si256((__m256i *) &out_b[
+                        (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]);
+                _mm256_storeu_si256((__m256i *) &out_b[
+                        (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]);
+                _mm256_storeu_si256((__m256i *) &out_b[
+                        (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]);
+            }
+        }
+    }
+    for (ii = 0; ii < nrows; ii ++ ) {
+        for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) {
+            out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj];
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Shuffle bits within the bytes of eight element blocks. */
+int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    CHECK_MULT_EIGHT(size);
+
+    // With a bit of care, this could be written such that such that it is
+    // in_buf = out_buf safe.
+    const char* in_b = (const char*) in;
+    char* out_b = (char*) out;
+
+    size_t ii, jj, kk;
+    size_t nbyte = elem_size * size;
+
+    __m256i ymm;
+    int32_t bt;
+
+    if (elem_size % 4) {
+        return bshuf_shuffle_bit_eightelem_SSE(in, out, size, elem_size);
+    } else {
+        for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) {
+            for (ii = 0; ii + 8 * elem_size - 1 < nbyte;
+                    ii += 8 * elem_size) {
+                ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]);
+                for (kk = 0; kk < 8; kk++) {
+                    bt = _mm256_movemask_epi8(ymm);
+                    ymm = _mm256_slli_epi16(ymm, 1);
+                    size_t ind = (ii + jj / 8 + (7 - kk) * elem_size);
+                    * (int32_t *) &out_b[ind] = bt;
+                }
+            }
+        }
+    }
+    return size * elem_size;
+}
+
+
+/* Untranspose bits within elements. */
+int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+
+    int64_t count;
+
+    CHECK_MULT_EIGHT(size);
+
+    void* tmp_buf = malloc(size * elem_size);
+    if (tmp_buf == NULL) return -1;
+
+    count = bshuf_trans_byte_bitrow_AVX(in, tmp_buf, size, elem_size);
+    CHECK_ERR_FREE(count, tmp_buf);
+    count =  bshuf_shuffle_bit_eightelem_AVX(tmp_buf, out, size, elem_size);
+
+    free(tmp_buf);
+    return count;
+}
+
+
+#else // #ifdef USEAVX2
+
+int64_t bshuf_trans_bit_byte_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -12;
+}
+
+
+int64_t bshuf_trans_bit_elem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -12;
+}
+
+
+int64_t bshuf_trans_byte_bitrow_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -12;
+}
+
+
+int64_t bshuf_shuffle_bit_eightelem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -12;
+}
+
+
+int64_t bshuf_untrans_bit_elem_AVX(const void* in, void* out, const size_t size,
+         const size_t elem_size) {
+    return -12;
+}
+
+#endif // #ifdef USEAVX2
+
+
+/* ---- Drivers selecting best instruction set at compile time. ---- */
+
+int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size, 
+        const size_t elem_size) {
+
+    int64_t count;
+#ifdef USEAVX2
+    count = bshuf_trans_bit_elem_AVX(in, out, size, elem_size);
+#elif defined(USESSE2)
+    count = bshuf_trans_bit_elem_SSE(in, out, size, elem_size);
+#elif defined(USEARMNEON)
+    count = bshuf_trans_bit_elem_NEON(in, out, size, elem_size);
+#else
+    count = bshuf_trans_bit_elem_scal(in, out, size, elem_size);
+#endif
+    return count;
+}
+
+
+int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size, 
+        const size_t elem_size) {
+
+    int64_t count;
+#ifdef USEAVX2
+    count = bshuf_untrans_bit_elem_AVX(in, out, size, elem_size);
+#elif defined(USESSE2)
+    count = bshuf_untrans_bit_elem_SSE(in, out, size, elem_size);
+#elif defined(USEARMNEON)
+    count = bshuf_untrans_bit_elem_NEON(in, out, size, elem_size);
+#else
+    count = bshuf_untrans_bit_elem_scal(in, out, size, elem_size);
+#endif
+    return count;
+}
+
+
+/* ---- Wrappers for implementing blocking ---- */
+
+/* Wrap a function for processing a single block to process an entire buffer in
+ * parallel. */
+int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out, \
+        const size_t size, const size_t elem_size, size_t block_size, const int option) {
+
+    omp_size_t ii = 0;
+    int64_t err = 0;
+    int64_t count, cum_count=0;
+    size_t last_block_size;
+    size_t leftover_bytes;
+    size_t this_iter;
+    char *last_in;
+    char *last_out;
+
+
+    ioc_chain C;
+    ioc_init(&C, in, out);
+
+
+    if (block_size == 0) {
+        block_size = bshuf_default_block_size(elem_size);
+    }
+    if (block_size % BSHUF_BLOCKED_MULT) return -81;
+
+#if defined(_OPENMP)
+    #pragma omp parallel for schedule(dynamic, 1) \
+            private(count) reduction(+ : cum_count)
+#endif
+    for (ii = 0; ii < (omp_size_t)( size / block_size ); ii ++) {
+        count = fun(&C, block_size, elem_size, option);
+        if (count < 0) err = count;
+        cum_count += count;
+    }
+
+    last_block_size = size % block_size;
+    last_block_size = last_block_size - last_block_size % BSHUF_BLOCKED_MULT;
+    if (last_block_size) {
+        count = fun(&C, last_block_size, elem_size, option);
+        if (count < 0) err = count;
+        cum_count += count;
+    }
+
+    if (err < 0) return err;
+
+    leftover_bytes = size % BSHUF_BLOCKED_MULT * elem_size;
+    //this_iter;
+    last_in = (char *) ioc_get_in(&C, &this_iter);
+    ioc_set_next_in(&C, &this_iter, (void *) (last_in + leftover_bytes));
+    last_out = (char *) ioc_get_out(&C, &this_iter);
+    ioc_set_next_out(&C, &this_iter, (void *) (last_out + leftover_bytes));
+
+    memcpy(last_out, last_in, leftover_bytes);
+
+    ioc_destroy(&C);
+
+    return cum_count + leftover_bytes;
+}
+
+
+/* Bitshuffle a single block. */
+int64_t bshuf_bitshuffle_block(ioc_chain *C_ptr, \
+        const size_t size, const size_t elem_size, const int option) {
+
+    size_t this_iter;
+    const void *in;
+    void *out;
+    int64_t count;
+
+
+    
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + size * elem_size));
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    count = bshuf_trans_bit_elem(in, out, size, elem_size);
+    return count;
+}
+
+
+/* Bitunshuffle a single block. */
+int64_t bshuf_bitunshuffle_block(ioc_chain* C_ptr, \
+        const size_t size, const size_t elem_size, const int option) {
+
+
+    size_t this_iter;
+    const void *in;
+    void *out;
+    int64_t count;
+
+
+
+
+    in = ioc_get_in(C_ptr, &this_iter);
+    ioc_set_next_in(C_ptr, &this_iter,
+            (void*) ((char*) in + size * elem_size));
+    out = ioc_get_out(C_ptr, &this_iter);
+    ioc_set_next_out(C_ptr, &this_iter,
+            (void *) ((char *) out + size * elem_size));
+
+    count = bshuf_untrans_bit_elem(in, out, size, elem_size);
+    return count;
+}
+
+
+/* Write a 64 bit unsigned integer to a buffer in big endian order. */
+void bshuf_write_uint64_BE(void* buf, uint64_t num) {
+    int ii;
+    uint8_t* b = (uint8_t*) buf;
+    uint64_t pow28 = 1 << 8;
+    for (ii = 7; ii >= 0; ii--) {
+        b[ii] = num % pow28;
+        num = num / pow28;
+    }
+}
+
+
+/* Read a 64 bit unsigned integer from a buffer big endian order. */
+uint64_t bshuf_read_uint64_BE(void* buf) {
+    int ii;
+    uint8_t* b = (uint8_t*) buf;
+    uint64_t num = 0, pow28 = 1 << 8, cp = 1;
+    for (ii = 7; ii >= 0; ii--) {
+        num += b[ii] * cp;
+        cp *= pow28;
+    }
+    return num;
+}
+
+
+/* Write a 32 bit unsigned integer to a buffer in big endian order. */
+void bshuf_write_uint32_BE(void* buf, uint32_t num) {
+    int ii;
+    uint8_t* b = (uint8_t*) buf;
+    uint32_t pow28 = 1 << 8;
+    for (ii = 3; ii >= 0; ii--) {
+        b[ii] = num % pow28;
+        num = num / pow28;
+    }
+}
+
+
+/* Read a 32 bit unsigned integer from a buffer big endian order. */
+uint32_t bshuf_read_uint32_BE(const void* buf) {
+    int ii;
+    uint8_t* b = (uint8_t*) buf;
+    uint32_t num = 0, pow28 = 1 << 8, cp = 1;
+    for (ii = 3; ii >= 0; ii--) {
+        num += b[ii] * cp;
+        cp *= pow28;
+    }
+    return num;
+}
+
+
+/* ---- Public functions ----
+ *
+ * See header file for description and usage.
+ *
+ */
+
+size_t bshuf_default_block_size(const size_t elem_size) {
+    // This function needs to be absolutely stable between versions.
+    // Otherwise encoded data will not be decodable.
+
+    size_t block_size = BSHUF_TARGET_BLOCK_SIZE_B / elem_size;
+    // Ensure it is a required multiple.
+    block_size = (block_size / BSHUF_BLOCKED_MULT) * BSHUF_BLOCKED_MULT;
+    return MAX(block_size, BSHUF_MIN_RECOMMEND_BLOCK);
+}
+
+
+int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    return bshuf_blocked_wrap_fun(&bshuf_bitshuffle_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+
+int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size) {
+
+    return bshuf_blocked_wrap_fun(&bshuf_bitunshuffle_block, in, out, size,
+            elem_size, block_size, 0/*option*/);
+}
+
+
+#undef TRANS_BIT_8X8
+#undef TRANS_ELEM_TYPE
+#undef MAX
+#undef CHECK_MULT_EIGHT
+#undef CHECK_ERR_FREE
+
+#undef USESSE2
+#undef USEAVX2
diff --git a/src/bitshuffle_core.h b/src/bitshuffle_core.h
new file mode 100644
index 00000000..fba7301c
--- /dev/null
+++ b/src/bitshuffle_core.h
@@ -0,0 +1,169 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Worker routines return an int64_t which is the number of bytes processed
+ * if positive or an error code if negative.
+ *
+ * Error codes:
+ *      -1    : Failed to allocate memory.
+ *      -11   : Missing SSE.
+ *      -12   : Missing AVX.
+ *      -13   : Missing Arm Neon.
+ *      -80   : Input size not a multiple of 8.
+ *      -81   : block_size not multiple of 8.
+ *      -91   : Decompression error, wrong number of bytes processed.
+ *      -1YYY : Error internal to compression routine with error code -YYY.
+ */
+
+
+#ifndef BITSHUFFLE_CORE_H
+#define BITSHUFFLE_CORE_H
+
+// We assume GNU g++ defining `__cplusplus` has stdint.h
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
+#include <stdint.h>
+#else
+  typedef unsigned char       uint8_t;
+  typedef unsigned short      uint16_t;
+  typedef unsigned int        uint32_t;
+  typedef   signed int        int32_t;
+  typedef unsigned long long  uint64_t;
+  typedef long long           int64_t;
+#endif
+
+#include <stdlib.h>
+
+
+// These are usually set in the setup.py.
+#ifndef BSHUF_VERSION_MAJOR
+#define BSHUF_VERSION_MAJOR 0
+#define BSHUF_VERSION_MINOR 4
+#define BSHUF_VERSION_POINT 0
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* --- bshuf_using_SSE2 ----
+ *
+ * Whether routines where compiled with the SSE2 instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using SSE2, 0 otherwise.
+ *
+ */
+int bshuf_using_SSE2(void);
+
+
+/* ---- bshuf_using_NEON ----
+ *
+ * Whether routines where compiled with the NEON instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using NEON, 0 otherwise.
+ *
+ */
+int bshuf_using_NEON(void);
+
+
+/* ---- bshuf_using_AVX2 ----
+ *
+ * Whether routines where compiled with the AVX2 instruction set.
+ *
+ * Returns
+ * -------
+ *  1 if using AVX2, 0 otherwise.
+ *
+ */
+int bshuf_using_AVX2(void);
+
+
+/* ---- bshuf_default_block_size ----
+ *
+ * The default block size as function of element size.
+ *
+ * This is the block size used by the blocked routines (any routine
+ * taking a *block_size* argument) when the block_size is not provided
+ * (zero is passed).
+ *
+ * The results of this routine are guaranteed to be stable such that
+ * shuffled/compressed data can always be decompressed.
+ *
+ * Parameters
+ * ----------
+ *  elem_size : element size of data to be shuffled/compressed.
+ *
+ */
+size_t bshuf_default_block_size(const size_t elem_size);
+
+
+/* ---- bshuf_bitshuffle ----
+ *
+ * Bitshuffle the data.
+ *
+ * Transpose the bits within elements, in blocks of *block_size*
+ * elements.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Do transpose in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes processed, negative error-code if failed.
+ *
+ */
+int64_t bshuf_bitshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+
+/* ---- bshuf_bitunshuffle ----
+ *
+ * Unshuffle bitshuffled data.
+ *
+ * Untranspose the bits within elements, in blocks of *block_size*
+ * elements.
+ *
+ * To properly unshuffle bitshuffled data, *size*, *elem_size* and *block_size*
+ * must match the parameters used to shuffle the data.
+ *
+ * Parameters
+ * ----------
+ *  in : input buffer, must be of size * elem_size bytes
+ *  out : output buffer, must be of size * elem_size bytes
+ *  size : number of elements in input
+ *  elem_size : element size of typed data
+ *  block_size : Do transpose in blocks of this many elements. Pass 0 to
+ *  select automatically (recommended).
+ *
+ * Returns
+ * -------
+ *  number of bytes processed, negative error-code if failed.
+ *
+ */
+int64_t bshuf_bitunshuffle(const void* in, void* out, const size_t size,
+        const size_t elem_size, size_t block_size);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_CORE_H
diff --git a/src/bitshuffle_internals.h b/src/bitshuffle_internals.h
new file mode 100644
index 00000000..59356f10
--- /dev/null
+++ b/src/bitshuffle_internals.h
@@ -0,0 +1,75 @@
+/*
+ * Bitshuffle - Filter for improving compression of typed binary data.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ */
+
+
+#ifndef BITSHUFFLE_INTERNALS_H
+#define BITSHUFFLE_INTERNALS_H
+
+// We assume GNU g++ defining `__cplusplus` has stdint.h
+#if (defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199900L) || defined(__cplusplus)
+#include <stdint.h>
+#else
+  typedef unsigned char       uint8_t;
+  typedef unsigned short      uint16_t;
+  typedef unsigned int        uint32_t;
+  typedef   signed int        int32_t;
+  typedef unsigned long long  uint64_t;
+  typedef long long           int64_t;
+#endif
+
+#include <stdlib.h>
+#include "iochain.h"
+
+
+// Constants.
+#ifndef BSHUF_MIN_RECOMMEND_BLOCK
+#define BSHUF_MIN_RECOMMEND_BLOCK 128
+#define BSHUF_BLOCKED_MULT 8    // Block sizes must be multiple of this.
+#define BSHUF_TARGET_BLOCK_SIZE_B 8192
+#endif
+
+
+// Macros.
+#define CHECK_ERR_FREE(count, buf) if (count < 0) { free(buf); return count; }
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ---- Utility functions for internal use only ---- */
+
+int64_t bshuf_trans_bit_elem(const void* in, void* out, const size_t size,
+        const size_t elem_size);
+
+/* Read a 32 bit unsigned integer from a buffer big endian order. */
+uint32_t bshuf_read_uint32_BE(const void* buf);
+
+/* Write a 32 bit unsigned integer to a buffer in big endian order. */
+void bshuf_write_uint32_BE(void* buf, uint32_t num);
+
+int64_t bshuf_untrans_bit_elem(const void* in, void* out, const size_t size,
+        const size_t elem_size);
+
+/* Function definition for worker functions that process a single block. */
+typedef int64_t (*bshufBlockFunDef)(ioc_chain* C_ptr,
+        const size_t size, const size_t elem_size, const int option);
+
+/* Wrap a function for processing a single block to process an entire buffer in
+ * parallel. */
+int64_t bshuf_blocked_wrap_fun(bshufBlockFunDef fun, const void* in, void* out,
+        const size_t size, const size_t elem_size, size_t block_size, const int option);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif  // BITSHUFFLE_INTERNALS_H
diff --git a/src/bshuf_h5filter.c b/src/bshuf_h5filter.c
new file mode 100644
index 00000000..114b91ff
--- /dev/null
+++ b/src/bshuf_h5filter.c
@@ -0,0 +1,260 @@
+/*
+ * Bitshuffle HDF5 filter
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include "bitshuffle.h"
+#include "bshuf_h5filter.h"
+
+
+#define PUSH_ERR(func, minor, str)                                      \
+    H5Epush1(__FILE__, func, __LINE__, H5E_PLINE, minor, str)
+
+
+// Prototypes from bitshuffle.c
+void bshuf_write_uint64_BE(void* buf, uint64_t num);
+uint64_t bshuf_read_uint64_BE(void* buf);
+void bshuf_write_uint32_BE(void* buf, uint32_t num);
+uint32_t bshuf_read_uint32_BE(const void* buf);
+
+
+// Only called on compresion, not on reverse.
+herr_t bshuf_h5_set_local(hid_t dcpl, hid_t type, hid_t space){
+
+    herr_t r;
+    size_t ii;
+
+    unsigned int elem_size;
+
+    unsigned int flags;
+    size_t nelements = 8;
+    size_t nelem_max = 11;
+    unsigned values[] = {0,0,0,0,0,0,0,0,0,0,0};
+    unsigned tmp_values[] = {0,0,0,0,0,0,0,0};
+    char msg[80];
+
+    r = H5Pget_filter_by_id2(dcpl, BSHUF_H5FILTER, &flags, &nelements,
+            tmp_values, 0, NULL, NULL);
+    if(r<0) return -1;
+
+    // First 3 slots reserved. Move any passed options to higher addresses.
+    for (ii=0; ii < nelements && ii + 3 < nelem_max; ii++) {
+        values[ii + 3] = tmp_values[ii];
+    }
+
+    nelements = 3 + nelements;
+
+    values[0] = BSHUF_VERSION_MAJOR;
+    values[1] = BSHUF_VERSION_MINOR;
+
+    elem_size = H5Tget_size(type);
+    if(elem_size <= 0) {
+        PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
+                "Invalid element size.");
+        return -1;
+    }
+
+    values[2] = elem_size;
+
+    // Validate user supplied arguments.
+    if (nelements > 3) {
+        if (values[3] % 8 || values[3] < 0) {
+            sprintf(msg, "Error in bitshuffle. Invalid block size: %d.",
+                    values[3]);
+            PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, msg);
+            return -1;
+        }
+    }
+    if (nelements > 4) {
+        switch (values[4]) {
+            case 0:
+                break;
+            case BSHUF_H5_COMPRESS_LZ4:
+                break;
+            #ifdef ZSTD_SUPPORT
+            case BSHUF_H5_COMPRESS_ZSTD:
+                break;
+            #endif
+            default:
+                PUSH_ERR("bshuf_h5_set_local", H5E_CALLBACK, 
+                         "Invalid bitshuffle compression.");
+        }
+    }
+
+    r = H5Pmodify_filter(dcpl, BSHUF_H5FILTER, flags, nelements, values);
+    if(r<0) return -1;
+
+    return 1;
+}
+
+
+size_t bshuf_h5_filter(unsigned int flags, size_t cd_nelmts,
+           const unsigned int cd_values[], size_t nbytes,
+           size_t *buf_size, void **buf) {
+
+    size_t size, elem_size;
+    int err = -1;
+    char msg[80];
+    size_t block_size = 0;
+    size_t buf_size_out, nbytes_uncomp, nbytes_out;
+    char* in_buf = *buf;
+    void *out_buf;
+
+    if (cd_nelmts < 3) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Not enough parameters.");
+        return 0;
+    }
+    elem_size = cd_values[2];
+#ifdef ZSTD_SUPPORT
+    const int comp_lvl = cd_values[5]; 
+#endif
+
+    // User specified block size.
+    if (cd_nelmts > 3) block_size = cd_values[3];
+
+    if (block_size == 0) block_size = bshuf_default_block_size(elem_size);
+    
+#ifndef ZSTD_SUPPORT
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "ZSTD compression filter chosen but ZSTD support not installed.");
+        return 0;
+    }
+#endif
+
+    // Compression in addition to bitshuffle.
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        if (flags & H5Z_FLAG_REVERSE) {
+            // First eight bytes is the number of bytes in the output buffer,
+            // little endian.
+            nbytes_uncomp = bshuf_read_uint64_BE(in_buf);
+            // Override the block size with the one read from the header.
+            block_size = bshuf_read_uint32_BE((const char*) in_buf + 8) / elem_size;
+            // Skip over the header.
+            in_buf += 12;
+            buf_size_out = nbytes_uncomp;
+        } else {
+            nbytes_uncomp = nbytes;
+            // Pick which compressions library to use
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+              buf_size_out = bshuf_compress_lz4_bound(nbytes_uncomp / elem_size, 
+                  elem_size, block_size) + 12;
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+              buf_size_out = bshuf_compress_zstd_bound(nbytes_uncomp / elem_size, 
+                  elem_size, block_size) + 12;
+            }
+#endif
+        }
+    } else {
+        nbytes_uncomp = nbytes;
+        buf_size_out = nbytes;
+    }
+
+    // TODO, remove this restriction by memcopying the extra.
+    if (nbytes_uncomp % elem_size) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Non integer number of elements.");
+        return 0;
+    }
+    size = nbytes_uncomp / elem_size;
+
+    out_buf = malloc(buf_size_out);
+    if (out_buf == NULL) {
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, 
+                "Could not allocate output buffer.");
+        return 0;
+    }
+
+    if (cd_nelmts > 4 && (cd_values[4] == BSHUF_H5_COMPRESS_LZ4 || cd_values[4] == BSHUF_H5_COMPRESS_ZSTD)) {
+        if (flags & H5Z_FLAG_REVERSE) {
+            // Bit unshuffle/decompress.
+            // Pick which compressions library to use
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+              err = bshuf_decompress_lz4(in_buf, out_buf, size, elem_size, block_size);
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+              err = bshuf_decompress_zstd(in_buf, out_buf, size, elem_size, block_size);
+            }
+#endif
+            nbytes_out = nbytes_uncomp;
+        } else {
+            // Bit shuffle/compress.
+            // Write the header, described in
+            // http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
+            // Techincally we should be using signed integers instead of
+            // unsigned ones, however for valid inputs (positive numbers) these
+            // have the same representation.
+            bshuf_write_uint64_BE(out_buf, nbytes_uncomp);
+            bshuf_write_uint32_BE((char*) out_buf + 8, block_size * elem_size);
+            if(cd_values[4] == BSHUF_H5_COMPRESS_LZ4) {
+                err = bshuf_compress_lz4(in_buf, (char*) out_buf + 12, size,
+                        elem_size, block_size); 
+            }
+#ifdef ZSTD_SUPPORT
+            else if (cd_values[4] == BSHUF_H5_COMPRESS_ZSTD) {
+                err = bshuf_compress_zstd(in_buf, (char*) out_buf + 12, size,
+                        elem_size, block_size, comp_lvl); 
+            }
+#endif
+            nbytes_out = err + 12;
+        } 
+    } else {
+            if (flags & H5Z_FLAG_REVERSE) {
+            // Bit unshuffle.
+            err = bshuf_bitunshuffle(in_buf, out_buf, size, elem_size,
+                    block_size); } else {
+            // Bit shuffle.
+            err = bshuf_bitshuffle(in_buf, out_buf, size, elem_size,
+                    block_size); } nbytes_out = nbytes; }
+    //printf("nb_in %d, nb_uncomp %d, nb_out %d, buf_out %d, block %d\n",
+    //nbytes, nbytes_uncomp, nbytes_out, buf_size_out, block_size);
+
+    if (err < 0) {
+        sprintf(msg, "Error in bitshuffle with error code %d.", err);
+        PUSH_ERR("bshuf_h5_filter", H5E_CALLBACK, msg);
+        free(out_buf);
+        return 0;
+    } else {
+        free(*buf);
+        *buf = out_buf;
+        *buf_size = buf_size_out;
+
+        return nbytes_out;
+    }
+}
+
+
+
+H5Z_class_t bshuf_H5Filter[1] = {{
+    H5Z_CLASS_T_VERS,
+    (H5Z_filter_t)(BSHUF_H5FILTER),
+    1, 1,
+    "bitshuffle; see https://github.com/kiyo-masui/bitshuffle",
+    NULL,
+    (H5Z_set_local_func_t)(bshuf_h5_set_local),
+    (H5Z_func_t)(bshuf_h5_filter)
+}};
+
+
+int bshuf_register_h5filter(void){
+
+    int retval;
+
+    retval = H5Zregister(bshuf_H5Filter);
+    if(retval<0){
+        PUSH_ERR("bshuf_register_h5filter",
+                 H5E_CANTREGISTER, "Can't register bitshuffle filter");
+    }
+    return retval;
+}
diff --git a/src/bshuf_h5filter.h b/src/bshuf_h5filter.h
new file mode 100644
index 00000000..54ee6775
--- /dev/null
+++ b/src/bshuf_h5filter.h
@@ -0,0 +1,67 @@
+/*
+ * Bitshuffle HDF5 filter
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Filter Options
+ * --------------
+ *  block_size (option slot 0) : interger (optional)
+ *      What block size to use (in elements not bytes). Default is 0,
+ *      for which bitshuffle will pick a block size with a target of 8kb.
+ *  Compression (option slot 1) : 0 or BSHUF_H5_COMPRESS_LZ4
+ *      Whether to apply LZ4 compression to the data after bitshuffling.
+ *      This is much faster than applying compression as a second filter
+ *      because it is done when the small block of data is already in the
+ *      L1 cache.
+ *
+ *      For LZ4 compression, the compressed format of the data is the same as
+ *      for the normal LZ4 filter described in
+ *      http://www.hdfgroup.org/services/filters/HDF5_LZ4.pdf.
+ *
+ */
+
+
+#ifndef BSHUF_H5FILTER_H
+#define BSHUF_H5FILTER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define H5Z_class_t_vers 2
+#include "hdf5.h"
+
+
+#define BSHUF_H5FILTER 32008
+
+
+#define BSHUF_H5_COMPRESS_LZ4 2
+#define BSHUF_H5_COMPRESS_ZSTD 3
+
+
+extern H5Z_class_t bshuf_H5Filter[1];
+
+
+/* ---- bshuf_register_h5filter ----
+ *
+ * Register the bitshuffle HDF5 filter within the HDF5 library.
+ *
+ * Call this before using the bitshuffle HDF5 filter from C unless
+ * using dynamically loaded filters.
+ *
+ */
+int bshuf_register_h5filter(void);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // BSHUF_H5FILTER_H
diff --git a/src/bshuf_h5plugin.c b/src/bshuf_h5plugin.c
new file mode 100644
index 00000000..22e99929
--- /dev/null
+++ b/src/bshuf_h5plugin.c
@@ -0,0 +1,19 @@
+/*
+ * Dynamically loaded filter plugin for HDF5 Bitshuffle filter.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+
+#include "bshuf_h5filter.h"
+#include "H5PLextern.h"
+
+H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
+const void* H5PLget_plugin_info(void) {return bshuf_H5Filter;}
+
diff --git a/src/hdf5_dl.c b/src/hdf5_dl.c
new file mode 100644
index 00000000..8e47fb80
--- /dev/null
+++ b/src/hdf5_dl.c
@@ -0,0 +1,358 @@
+# /*##########################################################################
+#
+# Copyright (c) 2019 European Synchrotron Radiation Facility
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+# ###########################################################################*/
+/* This provides replacement for HDF5 functions/variables used by filters.
+ *
+ * Those replacement provides no-op functions by default and if init_filter
+ * is called it provides access to HDF5 functions/variables through dynamic
+ * loading.
+ * This is useful on Linux/macOS to avoid linking the plugin with a dedicated
+ * HDF5 library.
+ */
+#include <stdarg.h>
+#include <dlfcn.h>
+#include <stdbool.h>
+#include "hdf5.h"
+
+
+/*Function types*/
+/*H5*/
+typedef herr_t (*DL_func_H5open)(void);
+/*H5E*/
+typedef herr_t (* DL_func_H5Epush1)(
+    const char *file, const char *func, unsigned line,
+    H5E_major_t maj, H5E_minor_t min, const char *str);
+typedef herr_t (* DL_func_H5Epush2)(
+    hid_t err_stack, const char *file, const char *func, unsigned line,
+    hid_t cls_id, hid_t maj_id, hid_t min_id, const char *msg, ...);
+/*H5P*/
+typedef herr_t (* DL_func_H5Pget_filter_by_id2)(hid_t plist_id, H5Z_filter_t id,
+    unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/,
+    unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/,
+    unsigned *filter_config/*out*/);
+typedef int (* DL_func_H5Pget_chunk)(
+	hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/);
+typedef herr_t (* DL_func_H5Pmodify_filter)(
+    hid_t plist_id, H5Z_filter_t filter,
+    unsigned int flags, size_t cd_nelmts,
+    const unsigned int cd_values[/*cd_nelmts*/]);
+/*H5T*/
+typedef size_t (* DL_func_H5Tget_size)(
+    hid_t type_id);
+typedef H5T_class_t (* DL_func_H5Tget_class)(hid_t type_id);
+typedef hid_t (* DL_func_H5Tget_super)(hid_t type);
+typedef herr_t (* DL_func_H5Tclose)(hid_t type_id);
+/*H5Z*/
+typedef herr_t (* DL_func_H5Zregister)(
+    const void *cls);
+
+
+static struct {
+    /*H5*/
+    DL_func_H5open H5open;
+    /*H5E*/
+    DL_func_H5Epush1 H5Epush1;
+    DL_func_H5Epush2 H5Epush2;
+    /*H5P*/
+    DL_func_H5Pget_filter_by_id2 H5Pget_filter_by_id2;
+    DL_func_H5Pget_chunk H5Pget_chunk;
+    DL_func_H5Pmodify_filter H5Pmodify_filter;
+    /*H5T*/
+    DL_func_H5Tget_size H5Tget_size;
+    DL_func_H5Tget_class H5Tget_class;
+    DL_func_H5Tget_super H5Tget_super;
+    DL_func_H5Tclose H5Tclose;
+    /*H5T*/
+    DL_func_H5Zregister H5Zregister;
+} DL_H5Functions = {
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
+
+static struct {
+    /*HDF5 variables*/
+    void *h5e_cantregister_ptr;
+    void *h5e_callback_ptr;
+    void *h5e_pline_ptr;
+    void *h5e_err_cls_ptr;
+} H5Variables_ptr = {
+    NULL, NULL, NULL, NULL};
+
+/*HDF5 variables*/
+hid_t H5E_CANTREGISTER_g = -1;
+hid_t H5E_CALLBACK_g = -1;
+hid_t H5E_PLINE_g = -1;
+hid_t H5E_ERR_CLS_g = -1;
+
+
+static bool is_init = false;
+
+/*
+ * Try to find a symbol within a library
+ * 
+ * handle: Handle to the library
+ * symbol: Symbol to look for
+ * Returns: a pointer to the symbol or NULL 
+ * if the symbol can't be found 
+ */
+void *find_sym(void *handle, const char *symbol) {
+
+  void *ret = NULL, *err = NULL;
+  dlerror(); /* clear error code */
+  ret = dlsym(handle, symbol);
+
+  if(ret != NULL && (err = dlerror()) == NULL)
+    return ret;
+  else
+    return NULL;
+}
+
+/*
+ * Check that all symbols have been loaded
+ * 
+ * Returns: -1 if an error occured, 0 for success
+ */
+int check_symbols() {
+
+  if(DL_H5Functions.H5open == NULL)
+    return -1;
+
+  /*H5E*/
+  if(DL_H5Functions.H5Epush1 == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Epush2 == NULL)
+    return -1;
+
+  /*H5P*/
+  if(DL_H5Functions.H5Pget_filter_by_id2 == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Pget_chunk == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Pmodify_filter == NULL)
+    return -1;
+
+  /*H5T*/
+  if(DL_H5Functions.H5Tget_size == NULL)
+    return -1;
+
+  if(DL_H5Functions.H5Tget_class == NULL)
+    return -1;
+  
+  if(DL_H5Functions.H5Tget_super == NULL)
+    return -1;
+  
+  if(DL_H5Functions.H5Tclose == NULL)
+    return -1;
+
+  /*H5Z*/
+  if(DL_H5Functions.H5Zregister == NULL)
+    return -1;
+
+  /*Variables*/
+  if(H5Variables_ptr.h5e_cantregister_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_callback_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_pline_ptr == NULL)
+    return -1;
+
+  if(H5Variables_ptr.h5e_err_cls_ptr == NULL)
+    return -1;
+
+  return 0;
+
+}
+
+/* Initialize the dynamic loading of symbols and register the plugin
+ *
+ * libname: Name of the DLL from which to load libHDF5 symbols
+ * Returns: -1 if an error occured, 0 for success
+ */
+int init_filter(const char *libname)
+{
+    int retval = -1;
+  	void *handle = NULL;
+
+    handle = dlopen(libname, RTLD_LAZY | RTLD_LOCAL);
+
+    if (handle != NULL) {
+        /*H5*/
+        if(DL_H5Functions.H5open == NULL)
+            // find_sym will return NULL if it fails so no need to check return ptr
+            DL_H5Functions.H5open = (DL_func_H5open)find_sym(handle, "H5open");
+
+        /*H5E*/
+        if(DL_H5Functions.H5Epush1 == NULL)
+            DL_H5Functions.H5Epush1 = (DL_func_H5Epush1)find_sym(handle, "H5Epush1");
+
+        if(DL_H5Functions.H5Epush2 == NULL)
+            DL_H5Functions.H5Epush2 = (DL_func_H5Epush2)find_sym(handle, "H5Epush2");
+
+        /*H5P*/
+        if(DL_H5Functions.H5Pget_filter_by_id2 == NULL)
+            DL_H5Functions.H5Pget_filter_by_id2 = 
+              (DL_func_H5Pget_filter_by_id2)find_sym(handle, "H5Pget_filter_by_id2");
+
+        if(DL_H5Functions.H5Pget_chunk == NULL)
+            DL_H5Functions.H5Pget_chunk = (DL_func_H5Pget_chunk)find_sym(handle, "H5Pget_chunk");
+
+        if(DL_H5Functions.H5Pmodify_filter == NULL)
+            DL_H5Functions.H5Pmodify_filter = 
+              (DL_func_H5Pmodify_filter)find_sym(handle, "H5Pmodify_filter");
+
+        /*H5T*/
+        if(DL_H5Functions.H5Tget_size == NULL)
+            DL_H5Functions.H5Tget_size = (DL_func_H5Tget_size)find_sym(handle, "H5Tget_size");
+
+        if(DL_H5Functions.H5Tget_class == NULL)
+            DL_H5Functions.H5Tget_class = (DL_func_H5Tget_class)find_sym(handle, "H5Tget_class");
+       
+        if(DL_H5Functions.H5Tget_super == NULL)
+            DL_H5Functions.H5Tget_super = (DL_func_H5Tget_super)find_sym(handle, "H5Tget_super");
+        
+        if(DL_H5Functions.H5Tclose == NULL)
+            DL_H5Functions.H5Tclose = (DL_func_H5Tclose)find_sym(handle, "H5Tclose");
+
+        /*H5Z*/
+        if(DL_H5Functions.H5Zregister == NULL)
+            DL_H5Functions.H5Zregister = (DL_func_H5Zregister)find_sym(handle, "H5Zregister");
+
+        /*Variables*/
+        if(H5Variables_ptr.h5e_cantregister_ptr == NULL)
+            H5Variables_ptr.h5e_cantregister_ptr = find_sym(handle, "H5E_CANTREGISTER_g");
+
+        if(H5Variables_ptr.h5e_callback_ptr == NULL)
+            H5Variables_ptr.h5e_callback_ptr = find_sym(handle, "H5E_CALLBACK_g");
+
+        if(H5Variables_ptr.h5e_pline_ptr == NULL)
+            H5Variables_ptr.h5e_pline_ptr = find_sym(handle, "H5E_PLINE_g");
+
+        if(H5Variables_ptr.h5e_err_cls_ptr == NULL)
+            H5Variables_ptr.h5e_err_cls_ptr = find_sym(handle, "H5E_ERR_CLS_g");
+
+        retval = check_symbols();
+        if(!retval) {
+            H5E_CANTREGISTER_g = *((hid_t *)H5Variables_ptr.h5e_cantregister_ptr);
+            H5E_CALLBACK_g = *((hid_t *)H5Variables_ptr.h5e_callback_ptr);
+            H5E_PLINE_g = *((hid_t *)H5Variables_ptr.h5e_pline_ptr);
+            H5E_ERR_CLS_g = *((hid_t *)H5Variables_ptr.h5e_err_cls_ptr);
+            is_init = true;
+        }
+    }
+
+    return retval;
+};
+
+
+#define CALL(fallback, func, ...)\
+    if(DL_H5Functions.func != NULL) {\
+        return DL_H5Functions.func(__VA_ARGS__);\
+    } else {\
+        return fallback;\
+    }
+
+
+/*Function wrappers*/
+/*H5*/
+herr_t H5open(void)
+{
+CALL(0, H5open)
+};
+
+/*H5E*/
+herr_t H5Epush1(const char *file, const char *func, unsigned line,
+    H5E_major_t maj, H5E_minor_t min, const char *str)
+{
+CALL(0, H5Epush1, file, func, line, maj, min, str)
+}
+
+herr_t H5Epush2(hid_t err_stack, const char *file, const char *func, unsigned line,
+    hid_t cls_id, hid_t maj_id, hid_t min_id, const char *fmt, ...)
+{
+    if(DL_H5Functions.H5Epush2 != NULL) {
+        /* Avoid using variadic: convert fmt+ ... to a message sting */
+        va_list ap;
+        char msg_string[256];  /*Buffer hopefully wide enough*/
+
+        va_start(ap, fmt);
+        vsnprintf(msg_string, sizeof(msg_string), fmt, ap);
+        msg_string[sizeof(msg_string) - 1] = '\0';
+        va_end(ap);
+
+        return DL_H5Functions.H5Epush2(err_stack, file, func, line, cls_id, maj_id, min_id, msg_string);
+    } else {
+        return 0;
+    }
+}
+
+/*H5P*/
+herr_t H5Pget_filter_by_id2(hid_t plist_id, H5Z_filter_t id,
+    unsigned int *flags/*out*/, size_t *cd_nelmts/*out*/,
+    unsigned cd_values[]/*out*/, size_t namelen, char name[]/*out*/,
+    unsigned *filter_config/*out*/)
+{
+CALL(0, H5Pget_filter_by_id2, plist_id, id, flags, cd_nelmts, cd_values, namelen, name, filter_config)
+}
+
+int H5Pget_chunk(hid_t plist_id, int max_ndims, hsize_t dim[]/*out*/)
+{
+CALL(0, H5Pget_chunk, plist_id, max_ndims, dim)
+}
+
+herr_t H5Pmodify_filter(hid_t plist_id, H5Z_filter_t filter,
+    unsigned int flags, size_t cd_nelmts,
+    const unsigned int cd_values[/*cd_nelmts*/])
+{
+CALL(0, H5Pmodify_filter, plist_id, filter, flags, cd_nelmts, cd_values)
+}
+
+/*H5T*/
+size_t H5Tget_size(hid_t type_id)
+{
+CALL(0, H5Tget_size, type_id)
+}
+
+H5T_class_t H5Tget_class(hid_t type_id)
+{
+CALL(H5T_NO_CLASS, H5Tget_class, type_id)
+}
+
+
+hid_t H5Tget_super(hid_t type)
+{
+CALL(0, H5Tget_super, type)
+}
+
+herr_t H5Tclose(hid_t type_id)
+{
+CALL(0, H5Tclose, type_id)
+}
+
+/*H5Z*/
+herr_t H5Zregister(const void *cls)
+{
+CALL(-1, H5Zregister, cls)
+}
diff --git a/src/iochain.c b/src/iochain.c
new file mode 100644
index 00000000..baa97296
--- /dev/null
+++ b/src/iochain.c
@@ -0,0 +1,90 @@
+/*
+ * IOchain - Distribute a chain of dependant IO events amoung threads.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+#include <stdlib.h>
+#include "iochain.h"
+
+
+void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0) {
+#ifdef _OPENMP
+    omp_init_lock(&C->next_lock);
+    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
+        omp_init_lock(&(C->in_pl[ii].lock));
+        omp_init_lock(&(C->out_pl[ii].lock));
+    }
+#endif
+    C->next = 0;
+    C->in_pl[0].ptr = in_ptr_0;
+    C->out_pl[0].ptr = out_ptr_0;
+}
+
+
+void ioc_destroy(ioc_chain *C) {
+#ifdef _OPENMP
+    omp_destroy_lock(&C->next_lock);
+    for (size_t ii = 0; ii < IOC_SIZE; ii ++) {
+        omp_destroy_lock(&(C->in_pl[ii].lock));
+        omp_destroy_lock(&(C->out_pl[ii].lock));
+    }
+#endif
+}
+
+
+const void * ioc_get_in(ioc_chain *C, size_t *this_iter) {
+#ifdef _OPENMP
+    omp_set_lock(&C->next_lock);
+    #pragma omp flush
+#endif
+    *this_iter = C->next;
+    C->next ++;
+#ifdef _OPENMP
+    omp_set_lock(&(C->in_pl[*this_iter % IOC_SIZE].lock));
+    omp_set_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    omp_set_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    omp_unset_lock(&C->next_lock);
+#endif
+    return C->in_pl[*this_iter % IOC_SIZE].ptr;
+}
+
+
+void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr) {
+    C->in_pl[(*this_iter + 1) % IOC_SIZE].ptr = in_ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->in_pl[(*this_iter + 1) % IOC_SIZE].lock));
+#endif
+}
+
+
+void * ioc_get_out(ioc_chain *C, size_t *this_iter) {
+#ifdef _OPENMP
+    omp_set_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
+    #pragma omp flush
+#endif
+    void *out_ptr = C->out_pl[*this_iter % IOC_SIZE].ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->out_pl[(*this_iter) % IOC_SIZE].lock));
+#endif
+    return out_ptr;
+}
+
+
+void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr) {
+    C->out_pl[(*this_iter + 1) % IOC_SIZE].ptr = out_ptr;
+#ifdef _OPENMP
+    omp_unset_lock(&(C->out_pl[(*this_iter + 1) % IOC_SIZE].lock));
+    // *in_pl[this_iter]* lock released at the end of the iteration to avoid being
+    // overtaken by previous threads and having *out_pl[this_iter]* corrupted.
+    // Especially worried about thread 0, iteration 0.
+    omp_unset_lock(&(C->in_pl[(*this_iter) % IOC_SIZE].lock));
+#endif
+}
+
diff --git a/src/iochain.h b/src/iochain.h
new file mode 100644
index 00000000..4e225d1b
--- /dev/null
+++ b/src/iochain.h
@@ -0,0 +1,94 @@
+/*
+ * IOchain - Distribute a chain of dependant IO events amoung threads.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ *
+ * Header File
+ *
+ * Similar in concept to a queue. Each task includes reading an input
+ * and writing output, but the location of the input/output (the pointers)
+ * depend on the previous item in the chain.
+ *
+ * This is designed for parallelizing blocked compression/decompression IO,
+ * where the destination of a compressed block depends on the compressed size
+ * of all previous blocks.
+ *
+ * Implemented with OpenMP locks.
+ *
+ *
+ * Usage
+ * -----
+ *  - Call `ioc_init` in serial block.
+ *  - Each thread should create a local variable *size_t this_iter* and 
+ *    pass its address to all function calls. Its value will be set
+ *    inside the functions and is used to identify the thread.
+ *  - Each thread must call each of the `ioc_get*` and `ioc_set*` methods
+ *    exactly once per iteration, starting with `ioc_get_in` and ending
+ *    with `ioc_set_next_out`.
+ *  - The order (`ioc_get_in`, `ioc_set_next_in`, *work*, `ioc_get_out`,
+ *    `ioc_set_next_out`, *work*) is most efficient.
+ *  - Have each thread call `ioc_end_pop`.
+ *  - `ioc_get_in` is blocked until the previous entry's
+ *    `ioc_set_next_in` is called.
+ *  - `ioc_get_out` is blocked until the previous entry's
+ *    `ioc_set_next_out` is called.
+ *  - There are no blocks on the very first iteration.
+ *  - Call `ioc_destroy` in serial block.
+ *  - Safe for num_threads >= IOC_SIZE (but less efficient).
+ *
+ */
+
+
+#ifndef IOCHAIN_H
+#define IOCHAIN_H
+
+
+#include <stdlib.h>
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+
+#define IOC_SIZE 33
+
+
+typedef struct ioc_ptr_and_lock {
+#ifdef _OPENMP
+    omp_lock_t lock;
+#endif
+    void *ptr;
+} ptr_and_lock;
+
+typedef struct ioc_const_ptr_and_lock {
+#ifdef _OPENMP
+    omp_lock_t lock;
+#endif
+    const void *ptr;
+} const_ptr_and_lock;
+
+
+typedef struct ioc_chain {
+#ifdef _OPENMP
+    omp_lock_t next_lock;
+#endif
+    size_t next;
+    const_ptr_and_lock in_pl[IOC_SIZE];
+    ptr_and_lock out_pl[IOC_SIZE];
+} ioc_chain;
+
+
+void ioc_init(ioc_chain *C, const void *in_ptr_0, void *out_ptr_0);
+void ioc_destroy(ioc_chain *C);
+const void * ioc_get_in(ioc_chain *C, size_t *this_iter);
+void ioc_set_next_in(ioc_chain *C, size_t* this_iter, void* in_ptr);
+void * ioc_get_out(ioc_chain *C, size_t *this_iter);
+void ioc_set_next_out(ioc_chain *C, size_t *this_iter, void* out_ptr);
+
+#endif  // IOCHAIN_H
+
diff --git a/src/lzf_h5plugin.c b/src/lzf_h5plugin.c
new file mode 100644
index 00000000..cbf7e3d8
--- /dev/null
+++ b/src/lzf_h5plugin.c
@@ -0,0 +1,42 @@
+/*
+ * Dynamically loaded filter plugin for HDF5 LZF filter.
+ *
+ * This file is part of Bitshuffle
+ * Author: Kiyoshi Masui <kiyo@physics.ubc.ca>
+ * Website: http://www.github.com/kiyo-masui/bitshuffle
+ * Created: 2014
+ *
+ * See LICENSE file for details about copyright and rights to use.
+ *
+ */
+
+
+#define H5Z_class_t_vers 2
+#include "lzf_filter.h"
+#include "H5PLextern.h"
+
+#include <stdint.h>
+
+
+size_t lzf_filter(unsigned flags, size_t cd_nelmts,
+                  const unsigned cd_values[], size_t nbytes,
+                  size_t *buf_size, void **buf);
+
+
+herr_t lzf_set_local(hid_t dcpl, hid_t type, hid_t space);
+
+
+H5Z_class_t lzf_H5Filter[1] = {{
+    H5Z_CLASS_T_VERS,
+    (H5Z_filter_t)(H5PY_FILTER_LZF),
+    1, 1,
+    "lzf",
+    NULL,
+    (H5Z_set_local_func_t)(lzf_set_local),
+    (H5Z_func_t)(lzf_filter)
+}};
+
+
+H5PL_type_t H5PLget_plugin_type(void) {return H5PL_TYPE_FILTER;}
+const void* H5PLget_plugin_info(void) {return lzf_H5Filter;}
+
diff --git a/tests/data/regression_0.1.3.h5 b/tests/data/regression_0.1.3.h5
new file mode 100644
index 0000000000000000000000000000000000000000..875b751bf73cc79ff5dc4e073c75288ff440856d
GIT binary patch
literal 114447
zcmeFa1ymi$w)l+&cL)vv5`udmxVt+9cZc8(!8J&54-z1_I|O%kcL@%`HThtG$>iqd
z&bRLRzjyD<qgd6utn2LhRdus_cb`6f0z7;$kQk5vHvt?R00aR3x8(ii+g*UG1N^P%
zK7U`YewVnd1px59%YOvAO#lMi`~d)<-IXKU_5Y9UWTbd`0j^u#_kS<%b_1Aybekf&
zFL;purw9n}a!T9`SmAEe_v@+sZk^r3(0XlRW}#zgsiS>c!f;n{zp7tb80Z<8Y8c<Q
zzhAew_XYnTf3<!EfNvY>->u8r^$YUL^(*oCw*9?6<gT}$e6Lr%$89~wtpND&+ff4m
z5OSA)5Pxj~_v=^vZow7Y4X4NDeg*!|j89rjoCgp92mt)9d)&Kqd;h8bxQ_47uS8(C
zJKVRExXYVC{Lkq5-`?x5&IA9*<<HIor--*By+03FJKYk#KMz#6-qv&63IMx-+r-0p
z;10-NT>R=h@YwPGNc*1|ARqwf?SGWJj{&^1$N&Lv3)=5K_Fe*CpD}J~kghj3^3h!$
z2ngu69l)>mzexZCp5EGy0QY;|c7%9$#<^(&YvCjF-7>tb|4(nd8zIs(KT05p7~~qn
zgT>Q)&OZ}ySTWh^nlF%39<!G@{-}WjGbLx(s5c?IOp)iQLM6NLIv)d?f;0yL4B_--
z=d)>yV;BWM3J(|ZDbX_!A4{*r_z_ei)b}uMr9Osc@;!;-Jg$zdyaidoXYWj`K&;+K
zr~&ajKm1ZZH+%8rg1QD_@o{p4PUr{GZvGy=$iBC5h>25JV(jF+V<~+FyLuOqj^mzB
zB3%p0;rE5)a`BhxDW{o?_P>bcDhd>cpM1tRQ;0r(C4I;720q7i<l3WtOQEO10!vpF
zmV`vSu4NYnVZBr#d0ravOl?-ONc&8@z90G00GfZEA?Sq9$AYO%3hL@&<q38@sZ0jp
z(-ETP+g_LeKC=tkBX8I~>eA|gE~((}w0`-%V+>t|Duho=8M%T1%_5Aq2<JlH+`pe=
z)O!gPC98gFK=m6b0xMHG8^E{QUXET6<O_QWw1E%QR`ywKw}*Zm%U@~6X$2xXNmWd;
zQ$=;N6P#5o$;<`<7=nu2O>1{b&cQSroHER&JuQn0O}p*|!nGw8PxULoHXXci_Gv=}
zpM>&ec8Ya!j+T20eC-xljV3Dkr>?SUJhEkyNW8(kG7x&gIDa{|M`G~c1$*;!fzrvk
z>cee%%Mx8FNpwC_SUakyZD$Tb;9D4O;^up!J`+>ny}A*N=`h|Un6yE!Y;%{BM`>pK
zQHh{MvM9AQDWs={swV^?Y)tXmDvmS4!<0!I3yNpUEI$%hc4~*=<m1-aXp-xq+t^3P
zfR$*c&L12>;7ko*LZgM$K;WayIjS#W`ES0FrZ5YSXnGRt7l<~k;4jJ6Xp8sMcAI&1
zf?#n-%bfmtpf_RqI&xgc>uE$T12+Qao2TXp@?f>8bEV;uw2N3dP<No<x68xN;R47+
z5P2y>m+MRM_59H2DU04@SO_>mNc2}0QzaH_XysS8y)eS|zT1+DW-orJW1?bw**5N@
zJv$^TT)%MU<yBH3UaOB;iDatmDjpP#CeRt+)SO&fL$3a~XsZ0}zHQh`5V+f3qy?oj
z)0wpy6di5kKakluS5X-#Ve!&PdA`%m1m4kA-EyK?3aGU#!rtzhI|b~V@Rx!e?MI)l
zbonF$xQYH0=DHV*JqVZf*H~6@N|JJ?sAK_3Bmn($AVy`j1SL1)M>}8CV*hZ43gSrL
z#*tNiu5lDo!D%Yyt1w)a7`JRr<57N;rTtdt>+xbwNwnW=r|v$i(`)am$9N*dlqnY(
z)tt{;)W*!QSXpST0_LK|D&rKy{Zar|*5gb`h++w{y~**>KhGgceoh)^;y?_bWLs5*
zdZ(_H`3`ELE1lM=%_3v5i+}^$45PxqY76{v^b2+Kv&eGiN{T|N+Q-r~HuV{oMcS*#
zlUgtzFe@o%Jh6cDiN9bJuRD(0cdJ^;cv5iMIXReajj_^$hwOJiNMF)z@Hb?l3P}o2
zn?w^+8)7}@3fL}A#hI3Wihd2_CTw0uj_Y_m)#E~_68z8lw-a~N;{%#ro)(B|QXGq3
z2}!4Hp?^G7&Kqilo%4RxndGucx;zV|bR`yRsWsfDgs;06#Fqa}bMLm7+VgFV8TBoB
z42Q>Kv4?~+v(`K*mG!{-?J>b>k5~D?PEUXltRnr3QhfSN16s^0SY=v)#vy4}kxwTE
zqMvq%-S$F`YlH2^x{K%o6m2$wTpUfDQz6290JQZ%X&Hp8b~JeYv&$RZoC;>);1j*~
z7(1bIu!Q}Sxb5s<g34a<hKASI@Bk!x=nZDIbSw&jx}3I$ZUo*PpNgdBx_f()v-&^z
zAz3lL8SmxySC7YlthvTf;qD{h;2*zGt&kp00V(Z4WIXjsCYZ`@n=w3d_p;>U6a>4&
zar6frP9k?hEOj=U#GZnuxmj6!yqLZ*AD2>?Cpl#XyvD;Rr_*d`ZFTDuRc&ns5<kV4
z;3-tFlCTI071Uf&^^CA+=61`O5)!#I6|<U8uY?4h9T)&MfK6JFY+53m@$7!mh{Y_m
zlhTbEgu;fy^uYgM((^Kd+&6K^5P0DbbeKqKe8ow)P=T=8J6YKIG!_q@gMU~`dEDCN
z(wEp$S_a5%@{_FAAQt`LW!lng5dLm6k+IuYzUgp%EHV5b$RI$|;mJynYk$%hui^f>
z+typ`lFT=7&%X{c+Ygzn$_$C_a||QS?-&peIRH(#I@2KT+Ev3G50Ncda;NgzzXR1P
z_yVV^4H9Da%ss}*3h*O&R$9FUhfI!G-P2QYf^-T?Z-Ob{H6_bh#5F;kyyOeV4-oyR
zHKj0V1cBBEz;Fy#Nh;((g#)m(1tSMYQ?23a6R5exk4OvG=356JjVi276dl2at5@}|
z5?@+fkQ8P~VzZxlTPb=iLdy4RJTrR)lL=>}u*tN1s(R%zb3Mg>{cw9-1OgC!Hxwtd
z-^`j{4h;ORX6;X|e{k>G>C^tQtKTZ_?SBQ$f8FlCy%rDv?Ej5j34GfP6oCG3i953t
z0Nk158o?s~O#>@SeQRA^V;yE(OC23teJd+7OD0N6Jp(I!YfXx4w-2R}f&FW86Aepi
z1Iq8M|H$uVhHoOrbp*POH{o|X76>RB<+kX?mim1${A|af-M0G0_8-?3s{Zd?{nI_}
z*A)lyKdt_~5ce)+_u}`Wf10>oSNdB2wE8bocyk?nbIpD42fmj#|2F?wsE7Ko^kx!o
z!oQ|}+vZ{WK;VJE1Azwu4+I_v{51*unSb-fvvL#e<^O;GW;XPXd*0@LwhMi4EB;Hi
ze`c58bGVnk<nf=k%hzyknck@SyE*qW|9@?l|H_{H-s6uG=X2w<`a`(q^t%FosC!60
z5O^T)K;VJE1Azwue-8qGW>4Pxq3-1`?Wdpam;1op61v~>HutkV^VhN`??-blf63!N
zZ%;CiexHEf&AFfX|9kc%-0wa9cpO;V+)DjJxaahX=6`7UkbEHUK;VJE1Azwu4+Q?I
z1pdsPoVXj<z5Jv7baSuz*Uw+N-90*7^n)q!vpw_IvM293+{<6`_+PRoZ{~pGI{uUA
z27KwR%df+Ys`u3ZKlA_h?8(s|X7#tv`E66d`aOM<y65z}0zXzi<Q@n-5O^T)K;VJE
z1A)H{0l=c`p}1a0_dWAaDJ10P5INgpt_;5g{`psihrN%#!hP*lk!~`1x19){kk<x$
zL75$9JrUG9Z!s0+DfbxO(jz^Rd7peYfeaPfmmcDP`Kc#k3rNcBn%BpATwY%Y1rs`8
znQt#k-yWJz(I|Qsq`v_Z2R}P+wKFk_!4TdMYzwP#$Xy1}L4l1hz11SMyb_57FQ#fD
zBeSGP_tQcS6H2IzV}UWc^jfQ=6yCRU*WR5`{Lyw|qEHVSS%fj(5SRV*^I87N%Shgs
z?5}c9m^FBj=t4qAMjHW5GXSW0><9Pybz`0AH?dOv@&Lk%r5X0m>mepy*I2hUkSkQ~
z`l4S{@kS4Lb7Y2oJh2nM)q*z~$xW(-))B7uvF|h}7cioJNh9or<coR*#2z|584T<a
zA&-F<Y2q7~*}ZyswF7TH;?tNv1`J?l5}aXV`#8sY02k!^ZI`uRIlP}9S-M&ek<LW;
zOOSsWCI|1rTUhN-N+D|P7u)CyLdpq!dc0Qon=0lX9_6;;YWRaUE~brMYeBPz=c>S>
z<Xs?T&^nx00mbN+^U^_&`ir=X0Zee9`Wewg^xJge%TM|HYr;#tD?^vk^)53|v9B}@
zGR{C}$65)tj@=vVs?pb$_Ki^(-a|Bp*Xu_IRjZsBV(oW3g2rVZwuuCL_MefNvo2%1
zi^9H(E}h^!YJs71A_+U!c?@<@x?cF`Gn@qtm@nO{FMhC&VqMtVNxP~k!7rfRWEyu2
zm7;mHqlV;i@uEqyysv8pp1u&b?}4@7khD20uusR0mxJ|%OoTBzqJVN(F6ZP3n`O0n
zmYaV<>(eA5k(r95HC%q>jp=5&pTgbrRlV>L{C21n;CV$xrp6cexD+)l_BTPLr!Z?&
z5&?61n+;nk_!YW`NItDoc4*1*0*oyJYP`yekwG~)PiG4Xu}6AfI%`Jtv0eSF6cb}{
zMOphOhWx0=v5~7)nU$|fg)&jA#HDDZ+fMbcCNoZG>@kFl^R3A3z|2Y%YTz+go&^Dg
zbIdXK6(_389woSUetl!hiw@hYWwNF-GYII5m^VZBSphj2pv)flX$S+U{)hBNo%jqq
zzz}ITR3(qojQ2|xO%Sir_(6Q(h;ZHHhh8rcgT8pzjhp*5w84cBV<Af!hFwDWaT>c(
z_M_Tj<<=*``}D%kUK9r^V0~d^d$kTi3z;%1kme0XPMy%*7t%aX;xv1ARR-97y%t<=
zS0&tY8t;pQ%lCyKtW-pLK`~|mO~kg_d?#wv<%^*6Y=aom4y`@0T(3kDlwn6OC4pQ5
zXM-L~<8Phv7)*5_99fhgEL|R)5$~yC9{`YPky<!0%&LaDVWR<t<>DA$O-xHBfSE9n
z7|-Z}J>@e^c``nzEA-A_t+H|CWK+XnI92ji3sm@Wqg7UE1`d_yaF);>V1grU^NpE;
zwqc_0I5~CLKC4b`cyuh{s4^vnjh=Ldw5x4&PU=)FhWkm5iG&RYr~ug7MzFno5j*zm
zM9gk0S<||!0o+XH%fu2dAovBgVI`TSkVCEaVmD5wB}RrlC>SUP8Em+kqZt#)h*BSR
zg!Qc!{m~%xW%Ntmo-JbaPY_AfmBHzr+FF?TAJP|3svyB|pzKb)Mpb;oUB$2s5rX#y
z4XAaYsfw2P3opZ?Js4UEmdcau`XWhQBl?T-VJ!d0n9(IGP?Ir49q%a}_&$<7qg<7T
zZDT4*!QtRH>57{WkzvYtf;;sT9)-^9xrbP9mB_Ug*WbKR&WgfdAgC?DJ#z2_Hb)Q@
zF#muWnz_P;rUoIb5)hx2(4XB36U<n!J=nWGk(eL#?6WFimA^EX2>y#_^$=m|f1K~E
zmiRQ;)DP(CTU_!Xp-;KhHG7YV%ZG{qZMv=P?C~KOMGU649CSbLo{$of8J`n3rXkz9
ze?sYp(C{EbhU*h-nR!}5{<L9{(eJXjPD;VRC=2^6T9EOBW6!%w1dCc1D~hJVk?0n#
zl)f*LE78eokINIoo>PcTH+~Ah5iLuz7*0@0;XG|z(=^%|s6&cw_t=oMfPW7^5HUQS
zq2)-${LVAc2RDW!Ls5OKdfl^XPtP5KXNtfJB5e;i1iQ492Q8t$KT^6WT(1nQTFb2)
zh+=!D55Dh>%Z?BI0i?PMFGoYh?AchJE#bhW03l{_3?lNTMD+969M(2(!X-_RHvm%s
z;%H}2r(Ka{WbK3{UE+=H>t3V}R&wL^7KVpMOM^%Nzl8&Y>g=l=9^4o6KUx*a_b+#k
zz)%o*3nJIe;MjG65$CUN>9IjGG5w{E7Cv}g#I@liq!G6eV#vFiA%Mr*)JuE)-XrrW
z1s2Bg_zQ?>21WIOBxo&E9~);tyVZaS67x1N;U+u$q!1P=w|icM4d!>M$aX}UQp0#M
zfttre_A&uhP0bN}(GESXW>Bj95*Hbh#<%+<u{~2y{~ZL9VtQ)eoYpJffQbSSm3e=A
zZ11(}Rq|gN{>-1WdZ+ij{H6cor~8vZ?6<pi|KNG}*`M*(@+aMMxR<}=@t^l6jq~45
zz>TWEyK{c#|L^&e_I{YvyG3#>6gj`AZ&LT1eplef>WAC|fd>K)1Re-H5O^T)Hz4q5
z_T<Lh81LmT?WdpYNg?jrU59_LJbt!k{#y3rJ%@YwOCJAudy+`%b^>lx{oS1Vng73M
zPoDlTt9Og!S|})fPv4~OIsLA{kJS&k2LcZS9tb=Tcp&gV;BP|!aQJ#CuGjJU{-h@s
zZGqaw&O)JoMJ^WtA{!3Aa;J{FXdkfdNyy=Q07E_jH=(_+1i&#Z-vDf4zIZ+}N_5La
zc&(sB5}s!8;cHR;R@G1`3Jvh`6E;5M2J{nJdV4+K3Iu1g`nKhF<s&TmE-0{Ci@Fym
zZ!TZvZF-CpTB^l%99Q_t!(zZNGmk3WYQZP1@y!l$R7!!fiooei(MSs(f1;$?p<oer
zlFx0545#T+aWh!nGi<Z^?spQEU!~YJ%^pq05^$|kFU1ET=9!vrQHbc2&lE^~CIKvA
z6FUTaA>4}o9=0~2<rz30+@i0}><2S;qFE1|r3x7;1MsaHrI1k2UG~9z)?IVQmr*xH
z)-?-%L>$=l5nw4uhFSjimA(+TI&`lAfCc;cQsawWLUC*?_o+{B%oB17fO7bZ100jg
zGnq-+N6L&MuJ2J=x3C=QWre#6!%TTKdFux&VQDT_@i&^O^So;5gHoK*dWG<|cB9bu
zK-($SlRd;$Rx}l9*KK`(0R3AMjA<30kybF_wH8u)oa02;W8{>PLW)rS6BY`5i68+l
zU*I!$MBz2#?Q!}a!W!y-9F-H!C_Bs_&sBw$-o=P=r-#7dnRV$soiS<ohGfD$C})E2
z8y_twItAs-UUuU#r?0W3oEm)`TCypARH!b(&raYi(PQ$``Ps%vUNXSE4q`}Kw+Tbl
z(&Xir5swpa&EqNu_KBh)qKul{q{gwRAP`vaR(EH~_Dm3EQ?%VhJtqr#64p-~!l%m9
zh=svzOVTrg+~6|O>)j)i7z;c{Eu5yt*$wPz=YsN3&o7fa1S$pv61BSDJ2dFL^B{mN
zEh|Tj$EGwqVlA08L1^-yS{-YLBl%(z<@0$kiD|63A@}O2h989;j48j2<0<?iek9an
z(~bcfo+4~se5>kqiV)1<*KKe0*b9!gkrPVz*o|<fOE#-?)6rE(&4)vXN-GEv9YI{-
zCF)$`FRTh*0igvvPoH6#WJ#13Y#wDM$=jSas!+ud0j0Hm4-q^;$a_$f+S~4<PrBNX
zC>Rs46{_Lu%NcjBV2Z}gn5k|Ryh6|S$ZZ5yO~$!UBR7@zV^DZdMCHMZQ|<6&@!@2I
z)H?uTFJI%#xBNz-km`*D6vH_#iEC=+YZsczdnNk@J`r%7F6T6nx-XS3pS<Z%G3{So
z5~zxPJ;f#G`{^_R`y+_!#ZYxB2Jh=;>c{-`v<8J&2@;V}ol5mrhIj}my|N#L`ysaz
z%6NUkve=i#prTf-cuWi4_<%uViY>0%vPfMu421eYD6(aJV6*_Kv5iD@tR#UC4Gh7v
zw7I}$FY%2+Su&@wBy*+`hF8#~O@s(VC8QSY$WZ$*qW*a}{t#ZYxL-|ki_T=aGNRkP
zWy?CukO=!Ib(3m{Y8bfILr`<n3@{3JbCWsHB8NKx&$&F4*9?^`e~eyYo|fAm#T%cy
z!vEr=3+Ki1#^sSqu2BdXc&C$5!r&m!u~bn3LQ&*Frn;ZmS1r_W0RN+IiQ<-LR893Q
zk;Pa9g*otW*tjMmZBGZSBGIrfr+{{-@!Ci-V2CWRaxkKBG$cFjUhD-_eSL7$U&qG1
z!cTkNg>|y>Fh(ga^L##?6@L!P9$a((jP8%PaZsNug#WlbSRqh9m?`cg;7LDCTK31o
z2-7!=H9ZPDtSt;8tbti#v7Xxt`8IjPlG6g52M3uVb_p?I`D~<xa5UmLsw|()!W;lS
z(l85AVOYA|#V!?120zfT69I!6DHXKr7f`NqA~#BykK+K3*!pM2gjRheenCio>Q&Rv
zkW86_O$!oCholo1ud<q32-jP%%11br=~jM%YXL20KifExc$9HFibtRb>sBT>A&w!9
zb=ZJV*@`lJk>JJ=-HsGl%8BzW;1YSYw0^iaD!phjIRwYBX8TIMP>X~>^g<4^pqR~b
zeHO9ipM~vH-M#Yg`Pzm<IPD}m_97$u(!Ztdshz&lWDVS(AmMoVWM0bdEk%T$zN&n$
zTxmSwtlfl(hKTJN+_c*=;5@X^m+`sRB~_B3zKNy@8;pRn&x5>?8w_`W`({(@JJ-ge
zpRg49>w9zIwb^El!}Rbw1v#G$c&s=FI8)t@BB67WhBhx-tyh6&jz!NPoJDs?6<$6;
zcs$IU_<51L!Yl@``xzXSiLR@F35(LzUg6qdFQdU^=sc;strrNJJXzKBP?98=Kplj;
zrF0KaGbYmT5f(~swh9zqDn3Fzn0U5xkL9=83dT&3o)1dgIJFWlmw^zr@-=%_eXTuq
zHU%!-<5~g_9GaAONsGGi8CwfD#e^ChfJ~z36<%vq^F*vP2_>Fwg!0cR8{}a%2YrhG
z&2=SvIfxxRkt)dNi+(){hdl#!n3TOj|2Hv`afedg&0pp0o;DbVOij|X4jR~%z%Uu}
z`eT89WM)iM5(MaimWPr9ig<M{dsZ*B++cQG%Q80>Y;$QsXqQg>+3%5(>1;NygT8%a
ztfsj9TPtlUfH)+S9y~xUO<Wuzt@jp^5|#CK@;=GKtTe&!zc24=HU3w^pZSy4@3gp=
zfApW+yrTTq&$~+~-|kxdgDc@@f5u<SpLEaRUjCBD|B^rHW)3*6<2~d*yeH32`*s3u
zRK02TTlks(zvoYyyZ^xZj_)Lt+K&a_IsI7vkb5BTK;VJE1Azwu4+Q=`1pdsPY`Poj
zz5J#9^wa&x3X|Ji1%I$Sezs@+yX;A2t@S+g(1Ubi1~FyYD4JtRw#k(;jvh#pCY%>V
zgL%(uEmf+sRTW-?xoKx!DLI<#cOSdFg>YG)4~y+N_9D+)Q$KSEkSl+$)~Um0TdozR
zAXn_-hum?t5=pAKOP)h>keVnEo_PjO2GuLr!Wq+SEnSA`Tp?o11uMVDLb@_TBKCIn
zq~hcW;F(_>loYklA%^#%hfr5+QH0Q1FOPB_Y<o$hO=Qv731V?4tXzvAnHKgiq9%ed
z@+)4S9EMkGLWky<FKo$JklPAk$>YQ9)Kt-T^Oxd3Rdx=UcGtx^jFI(i!6q%|S4|Jv
z_iU~SQGQ{)08pxM36-3iuqFT_TMeebOiIa$h@f1_;jC^aGKoLCP{V;<HeIeG?zS<i
z7Pee(<A35X4-yT=&1=N^p2SGCdce6Bn{T+e?-2pn^kG0bNZ09_q>PQEJl*HzX^(8v
zZ6GCc-49x-qwzJ~=X+$5pq-grP1@d4w02LmA?KZFKdaGU9?DA1qp*Sk<99kcRFs38
z>i6e(*r8<a>_QLFuv^Ob^z~kX2Olo@jMLVUE~^Z=z<org?TfFXe7mx!?#XggrpJT8
zpd_G&q842er6xqBtsanVs?Z3A(!sPIRe9QRY<RieLe!~__mwh=k8I~q7pFY&AoXfj
zf?_zG#(T=b=_`eItY878QB*wN7!1N|Wy}nU^hWIGM5jcRfIj{@9%SJ~4RBA>Axg<?
zvlYZ`)$JH&g-4xXb|e|}9ydA4mcHx0q=dE_ODl4Wds7oUwjqabyZC_wuVwvpy!Lt^
zR|*_qTS1H3)YOR!R+eBs)hi1yW4X`~F4a8<(tnZS>_11^Qq=X1?Tzhf3yN0e%K8}j
z%lD|Sb&>6<j^w_99ju2_m@fl$vzE7u3v%vCU$TT1Wx<&lW{tmYXjdZ-LX2%!)g5ig
z)M;JL!V7hU?}*XSssKR?ALg#hg;H+i0Nk3CKDTtG-iK<~NS<p0Hg1VAYC#_m%bl52
zR@-4(c2IN6`@%TPY~4-Q)k-hY0y9XivQ~MX#ckSSfy9v8aXX61WK7B8^FG9Y<EDPS
zIO0$&Ci9DZAH-VQIz82GoFX($UYv<7bt|Tl?2|=L&%D=}jTj|k2d2)xE8ee3VvE()
z1zvSc3At6MlW!F|3iS1yK(bK#?<$)g?si}8A;*B$X^WyYI5RkiAC_((Mblwu^pR(w
z9_8RqRA+)WClmro^H&F7aJeg<`_TK7;7(Pqx)fSR?y?H3ZII3uhiFOE5eEAYE<>6(
zVQ}VC+Yt0Vy@E6`DD5I2f3vm888LnH(#h+&z;!VYM4&%Dwcgbx*xsI<tzp;WVed%k
zrL3l6W+vp69UR|68K?SKY})Qsanf?8l7>Ki9uDi8NE*!l3XQ`weQI1L0i6XRadKQ1
zO3fJ&aH(mO=!Esfh4r?fb!3R17B{LPQ7$a!6Z!1-3$aC|?vLb0=&KakU&h+vu{*xT
zOrC!ZNg^WPh&h0)Ft+_v-!EjGJ}kq@)FSSSz;^w&aY7R-rufm34~MUCQfu>)QVy>$
z`o0W>EE<$_;-Bt#fmHmy%u*T#OGB@k#NPXw<qta9Z$4>d(eOFytgmVL3>BIiW86R3
zS(mIvv{=j^nB33rhX5M){1k^w`Llp3gt|Z#p&1~$W-?<Js*^{An(IKK>bv0hwW*S^
zr~Q#~G1z_k!{P#nha;RQHG*^{Gc_;B@rW@FwYtREJ1r$Dv>iX;#Zl&lig1#-IHl2}
zJ%4Yj1AX?G!x{E6PoHwK#Mw5QADb*?lubw8^Mav@$?=KJ!DRH>GP+7SW;mbOa}k@E
zecLH&ZrefhEwDx*)4c$-xHuGM5dDuj*>jn?*xJZK9J3@}n{db?A{U?Ao#&1w_yvOE
z?3?y+^ooYif1PPQid~NO8pKA`UOw{d*PH5+;*;$$lsM{~yL5_^?U3;Eq@1oA=MbG5
zsN1)Q35#@0=U8trU)T1;u{=}eayk!d#b-Yur+w+%#mL7JV>Uzm0wn1?mUrQipn)eP
zH0YkteBoy4B8=@Ew=Am>c<=j8MrR)Z{!f1HSXyzI+|nnNU~abD6*cP(h5qQ9AS(>{
zQ<L=Xf~y@T=W6@qKLVek2+SAvkDTCc@nJXT@ztA7o3{XFKHJ1cNBg9hp@94>T^ozA
zjLD-N0fl6O-ZYUP#lpWWFSa}@V_^-cEa0Wz+)iQaMf>suiZ;Sq0?g6O=mKLOC#|{i
zjHr*hq`{M5uq#^3y{P*NdpxSg1?nSUu$lH@SLK2wGpY`ws{;jI_V_j7ojRo^8Dh0>
zkO*^i#U^!<t_F3npR`nbY}k-VSwxw55x{Fk5#=W09;2eTvh#tn(DPhXvgQS%=M|T&
zkQRDU9IuE(Z2a{h^M5A%+527Z?Vx-4OMB=)?@wyCxt+BeRe|p(@Mrt$-}5Kw{b1GI
z_kR5%S+noyo78tsKbAk_9tb=Tcp&gV;DNvcfxi!dKl3Nud~N$C+{<6uPe1vS^et}p
z)c(Qp_}QNM@A4<z>ipN)-v7?ly;X=t{I7K<x~s0Z`GvxN5pJF~xC!7AxAIq8j=PGF
z#=mOu*C_w9>xldHe=mP|9Q^06Bf!0Gk>9BLZ(m3JHD>L*mj8Gh5RrVZ`)8^9<Di}I
zpVfXZyFU)lZr&R4F9OG1UB%6N9sa`MSHBP9`@>oM`v4gNzo&0f-;a+U%O7$N1Re-H
z5O^T)K;VJE-;Ds!&7+X@*YWyRUHYgi=VCe_5M5S>SG^cld1C_%R^r~KgFb(vNE(U_
z{~!#-Wg^@wpCxL|kjs7Jn39iQKX7}1Y}GltU%-+@Cun@xmRIGOj!tKaqFS&i8G7;9
z`wQAv)y9%|Oo7X}AZ#X@uoGfS|8EM~GT`tpca~<bL1KH+rWQRmW$W6xQ$SR+&1S-T
zUv-%fBYbP*FikZj4g36B){y}RW3^A9B&r9sd!96Z?`jGIa|*t8EP$|B=k5B!gh}wO
zIDi<j=kuF))?JSxVqC5)OLcMi6FF2-;uAqr7TdXSiRB&T@#fb>z*d0Fv?%t@!KMp#
z9U9TD!(?UxiZF?4pC$Xb+^4H(*`?@ui`~{tQk=%lOI0O-p<%TPT_@IR1>joT_Vdw(
zU8!5etL$g0@|?#T^`E(+f0(Hm4Acy$D+e<wo$jxl?o786oZtY!V~0LC!7d>T7781`
z((0L$YEpj2pJ)tR`z`Pj5`y-#cfIQXv`wtC;Y_<>K>RNq*()oj(qpZv*6H04ZXUC~
z9>v$UK+_y8O@$IJA1H;y>4bgH87WPr+b~BoS~=5v7Tu3vDY6`Lf?YdI3M=<=CPWp-
zWgZhp>sD3gs$b3!=N~?Q*P_1Vz7AuBN3c6T(yEDPhQ&K3_chR2s`x5eC5(MaX1@s_
zCNvQaoE+yH{^8NEuA5Xbdxq`_kJnX)kFxE0Ggalu=N$iYfQxU3sEs|-Sd#{3rRGLP
zexR+n^+M5mysag#YwC;{yW-YdCrObS-hedVRf!QA)sghtpVEb2j{;*4f;)&7yc1rK
z9tXEwTgQNLC!BaU|8OEN`j7X#UOWN8)hJvFAWiabF)Wjfq?O)e5bLYKQ-`)++Dsi9
zGaV?am!>t0DLQ@x>-?H92GfN}*P2m;4ZQY18aJGi{tLM?5zeP{@!AiWz+}2pPw*Sx
zgHbI;Q$^@UefmOh?7}|{=Ejm;-uHZsJVfO%6?tCBi+5u~##?`B^o97TCi)I*rp99M
zM)v?;VFdsV!=v3<0YMp`2D-{_+7qf6MJ+`*D6Bn=aKx@r=QWomc11-bAIvCA9cPsj
z{0y)3_U$i@ErF)lk2F0Y;tA>yHZIan2qL3er+J+0Lslre_NzPzP%K;c$KgDEvI{CO
z!XR3s<NdXsRiO?Ok-B??9m(hF^$n=nkPYb7>7+l(y~tcTX1A$BdV~73JO8ahPUrhK
z&9X_+xbY<juzkUpJ)lJSXHG88u3sTz{D|nFQzUB4H6*lRt_qqPFlefpuRg0qba8xm
zS1_|l9@Ur}IwmBTCI1|{L*D)gZk0I{XN4ss#q;b>;6fu-BlD-pE14PMuyVb2U$4MC
zvBMC=jo`ew)AZ2J$22;gd1=ndspobi<i4aJRLW`Uh<$bf?hG3pfVyY3<i_?+1<s+F
zTnw(I)t$E}o#t@7o2ITz8j{2tZhM1ZlL+YY1MbXcVjIH|)u|VB$2>=U)*RHMZ=fiQ
z`_5fC)dqM&+PFaQ{fA7<6dR6p%7H_Ny3AZXsj>aDhbbZKJaWw9B!L{Qa>9y(Etqf>
zLp=vNv8zWI)(d=2WTT!BCahqxS!lW^)VMSe(aBH4dGEol7`yZx19ngt+dNsfZ?@c!
zvj52FL`&Pq5+*OEbxc9CK~1If{uPsq10e5#Hu=_eRUYtc`1_EoCZ=BDyyjP2yq!lC
zFY4^`WV?0?{D}KSKU&0(wL})_XR$_(nA<g$^CfAj#%Lt-rKTi)`mn;5<usiuW4Mzf
zC0F)}6o&Ai8`QXmb1<{qv5X4)5C)|&6hq^{zO6)#5T_y-DY1Ji-f!GpsUOYj{UcZh
zKlRadbz(2u?0HYj<R+c#Bjq7H5cppw@Mr$x`a6y8<sbcKAlG%j{(eh+@ZGMz`><~E
z|Ly+cKl3Bj-wo<s{*uT4k{|JA4mhsky?f^m-)~Wfx-P#C_1E#H*>B-z{`a2e`xZaS
zU-^@8ewfwo`}n^9p1w)d-(`PS;4c3l9tb=Tcp&gV;DNvcfxjDp8{^>Sg=&C6dtV}%
zTKr^|GgE3%5z~-Zs|xUl#e%8VzRKsky8a~tl}>O3w$G5ujs^mAxwSS!kW)k(q;6*~
z)wnHOL0YKf(^=<=joi3W9p`*1ahNRFu8M;Urh_5gv``Uh*m0Ln0k$b%o&fp9SiOo4
za@pMhT$I+J!nV7TG5rFR*FwL$-(3_w*{$+KhZU(iQfacQ@s+O$G5L@!0NcxVRy3?2
zae-tuYm#T0-LT8m7D_%r;>OX-lZ$@yMISQV%Q(L(5{7PnS{1?xJKGb@?mV2uuUCvc
zcv~{*V7OqNf=JQjj#BG~5xue2t<4;rW?rzsv9jFzs9Dk-aVW>otrv=ekgs_{=9Yr9
zvu&r7BYdU}x+?<dXz&DOp+djyL<mso1s5$`9|L;I>tc@QINpWU)SMBq5XbP5crcw=
z{vjvSQlFYE2oMU%6{b8sx$&mI^c;SvvR(r|2h)iZm(Rc1t&Ef7zcKAg55EHkX3hup
zK9qEN-!?H=8a+<SQer4&`s}>h8&M9?`4Q2l^z?qDGqUa&VK7lXTXv#k;gL(Y3!5aV
zFA(^U48??|GAtQsE@rTE0FSLZa!b!l7e*HoJDwZvt?%qU&*oWgee0;qR`P;8Q8^~z
z>TndJcy%G0y}c&qeT3=cbnzfG%>b@v6xNir8MUN27!rJTl1fozU7wGQL})Cn!7YWj
z7nPDY0&44nBc;OcUT%yq6g{SuHIEl&@KPpGGJ9P7HlaLlhkC02#r7&i{Fac#qj^ed
zcF@lj$!tPNM^mPh{dqDkG=O_%x=!=Pgg<490%E}#=&OZ7D&W3Z$$cTLy7Zh=Rc6c%
zZUMorD4u3wUc<Z8d8tSBS#WRZo-!@XWVQg3mm-X0Ru(fszFX&xTZ?|5jCj!$M8)R`
zibCzerxwnK#?Wawk6z`>GN;*tn@MB3#0f3&4Cg6=u71H1X?vNttQwC@kQ9d3v0&Pc
zfF?MJY2LCYea0n+f0(G@4VQbd*0!8Hj?%JhSX?r<>zYNz^oZ0NKTJvXord^%v*in5
zGxhXYO$MRUT2`DZI&gw%k-1o4XlgrXlsBw~%Dd4Msl2;A7S>q^)~_wvBN(~*r7fS8
zfXL>!){vvK0e3v6ZG&a|M2|#X&Mp@Wn@Mm|J{y6TGQNt6%S^!)*wf_ciHE%zt8hex
zD(#8f>+6Ar2(hU3W`?ME?xJ3cu9<UKvWS3M1bw+zL!{c36&I?Tj}Fr5qaA>EG#<xU
zEFQI7>in0(qJ<!=69t{w41zWvIhqJD5@jHO7j&jz`NnWz^YV*ik(WNg$wRM+yaz39
zN?-#v*~w`Ys$+5zaM7fgvgD7o^Dx9yuK2c3jI>6-s6Q&|0M1<R&I(!pW1O60kl?lX
zT*@XLF&NTRMzJS<8gd2f76g30)8GR#w~b2$XO=@hlYGv@4J=PttB=u%UX0-;1}+bq
z87s<>T8!7Y-0Tww>Y1ilFvfl=;m)iil;}r_-2lA`hHEH8?m%Wq5E00_itWU>Wh`tl
zI?>*O<mqy<P6I<=+lZY>`3?@?6cPnuw_>vu^zCGJbGQO<z`M4y@X*ZYRB>tSY10EX
zJq@zHWs~@eWqlr)m6;<YoxFFVA5faaAe9zN2<_R^rno4DO@dcO7Dgk|z*9K=W#9_O
zokrc2p0dal6?^*g+9~F$q`b6-I}R}EO{Chkq;r8RYcSkq%wwE2s_kkkG;qRzaiBMA
zL3n}?Elv_2?-!6*kyZVr2a}lSSPWfHRNo*1QN1T{UXpl4*n&h9AC<!UtNMG>z9^(T
za)o1g*gTkx3XBRMVZ!OBJe|b2xi1A=putOD!2uUqXM&VvF|S|hx|Nx}*~#@J0>FUI
zVY3SWK*e#+Ah>kRx&UFc)w2~aXAR$OrSvMbn_LL@R+#21qPO8r=*Two1czfS)pZzN
zAQW8H)-oMX@0r`0mWx#_I7f-7N;ucqH{pR~Zhkz{4rcOZIHlCXZ_Ezj>Pq1e|0ZEd
zIpl%iNT~b}RF7cbIb+B{-X`^i<CfqBr<)5MWIh7K!J%Rj(1{g!8y@X>a;D3wc)H7r
zFwP=bjulX5*kvy1wd1!nt%ngT?}YX+$0Vqa@L2mo3se4X{}cajjOWk%NnLjs?&UB2
zCqF%pn0~h+$bRrV{Or&8?|L5b&-_XE9PZ^WdHm=7Ngp$B=k7+;-`zPs^Z)n!NfJNI
z>i1)1yzqPaCUwv0cLjc|e#ku#cp&gV;DNvcfd>MA8v?*&-~CA;?#_$uA!!v9IFpM!
zm2`n|nraZ%b)lVGZLo+XWIGz`e$vXRa0BQ=ru3^RKuk|~FhWvFNv$04`O&y%Fp`~!
z!6KGn0=08eUD;hi>I{t(Qxdl^rkOxz1N;p)lGOWfN>PtH<SabjCfO#D?H<h~Vgl%+
zA_HRKIF<>|2dnWzC-hhk3h6L%g~{^(!ejSc8b>7|uhGhua9lb(|FFN<qiUl|Yt`og
z$1lctHib_VhElltECVhOM6hxV5Lj&f@LOy^U;^7}Kk{cZtc|+3VLfLs;90)l*=|le
ztVWr18zvNBPlS&#pH=l~e0WW)2g*MTa)}1+hPW1}f?`Tv-Fi~e=jOji#k3W4rb$i8
zH8hCzk}mb-)zM53I2L>e;~?!wz#vmd1}IEWYWMogUAH<m_#E9d;K@sZubq!5E-1Wq
z8Y7<tNOU2#k`2eJFPO2HX91w1b~7C#e$)?vZJU)kLLCUtW?Iu$@{g;3<{l;B&;pSr
zQr|~Vcs$5H!G@OSX}<hqWxwJ{z9yS!_}KhHzqC%q0YWhUE~3)qHwq6oXB;+Ysh(IF
zu2_#=#FP#sN_1TgoLOU`9c5r7h%$$=bsXdvN0s*(o9ww&$U&qAs)B?XjEzU$sJLX<
z+(+z;df9;8sCz;dmC29FY{MIQPANNgun?JxMW00zD9|^lUO!oM<6Bgbk$Kbu`I&q}
zL5*a=d0xU-B4V{XCnMy-jnuhc4dWHxG@=8o{mk)G(#P@!yr$s+`QVa_eUlSZ91#4f
z3OqoEdIkmQ+V88`PF)erLRVqIiqa?(9g*H$u|KJ9iWbVvm|snPOXq=Ml(3Q7Dx^6J
z=z;vyQmqM=DrHp`mNHHJL$G#AvfE|OyS=C-jQN6-=x-9)8;}m}uM7*to_!2NWp^`k
zFFiFRSk#``jW{79Ovy3#025crWsrB%KevCKnyS6=<*8o66i02!WF)4o)aD*bRoHf*
zRqf%0&}*v-RAmG=1#AWp4GG1LjWB79lSO*a#+=!BT^AZb?q?;lF4qS*{vLuG`zT*9
z-1Bw@+TMYyF;Z?HIVl=XJi`^36bCnT$4kdQURAGchpg?&(j7d8Z_dfk;2dnu2h~_U
znXY&xS_he#1kYVpX-V19tz8X+Jqk4dvD2|BAWwqph(#wd<%WBBB_YFI;#8jP^xmqX
znK3*IkBtyZ3!0<wH3cbql3d<E^4TZHW&yiZUJP2xQ?Y%KkfTOnn$=eZ^oH_6MuJ)n
zm!HCLz3V9FmF#efcJc5l*6huxJ{-4BF;bzkd8UXG;jM5r87L>9E2pup5yqs2&n}LF
zzJjN%#|Orh^X+keg*_?#D&+G<;f7I&`XgEyVCBKZvNRc2y$wCO>0NBqof3#Owsj*T
z=M~LJvnYiVs0)KcSb*NB+AMzIYDs2Ca4If<;PGw|jhII@_<jU_P95|G*^&%3{(Rok
z?4Hl(XgS!6^0Xlyb4!TPL8eE6Y9K*#&Wm|Ykd6VaI<vSye~BE$KX9g`!gt<XxrlED
zUkvs30P(MHg4A^>$OiP{S0q}o^xxamv7qoNBdYXZ3=YC109=)kJ6I57(3*ufc0go^
z^`O4mSn9|!CjpX;h6Bs^Xm2f`#eorq_37e`h1WS_3tY}8Lz8tf`Z)u141E!)y3W1P
z90^3geHZtIpcez-n815RB>N0H7rYKZd8Zy2Wcm7s=lg(Aa^#7rd+dp{Ib>K_mHg5k
zr^jHz`{C(uBD9wKdWM_xgUXLkgY&H7$v7KW(=oPce3qAMu}#vv#v)gf1co_F&?>YN
zcCs+*l8&B(V)tA<`bbEu!o2lHy$)W}iRf*RLw55e6k9Q*WbAU$VK_tg$`+i&^w-y6
z6*@3u^=v_}5iFEHc64Z|l<0){LL}JHmy9ARbe!SXjKqG@bv>;N*it048JS0Qe8=u1
zcYGp=jv<uII0p`mHWpm*&Vi21IlB_8dN$&`N`#j1gMnLsv1VrpXgMIws(dx3nEm3F
zI|DErpwtv<hD4CEt_!_g8ry6uAERiCPqfS@oG9BXp2w+L-o+=NFF<&Ul-%Mynfw?_
zmVn!d`x&vV7))Wx^&4QO1x5Wa=bbg?a}`JspqCaSt6x_FHf7eTS}+Jx4TTL{wLL1=
zpjjzWKfCOMckIVjHkXhMAO29*&RZETv0{lC{k)JKfW%f&BqM|SZ}V^UFEEfl^Cu15
zjp1JY(tq-kKZ&UHc2}MsJP$wnGyYosq<aqc@|QgR^ZumMn>UnShZ|LYcjx@f|KIZ`
z>HjdRzkSYc8=~g#>6_F&r{5L$vHBtRK;VJE1Azwu4+I_v{A~yT^L+Ow^*&jm!}TCF
zghUV?)0V7w#nvJL$)T`E+a#}gw&vY%!Q*%6<<u#ymtAO@F$YqnXF2Ki4S!_?v^p*i
zh=P1j-Eq5uD?LI+oJyU_42(>Z%+i}MRW@iERPq~sGsTo4@$(hn8vIzl?#kqg!y1D9
zY@>bpHdstU;lietKox-lN+C&afk55Q1`^nYWs)B$4{X>R<KJ<cQ>hcxeTHgjR9PzR
z^Jk7w=HY_Mt@s8>C`|Ug!8x{rxlTvKJ$j%Q0zQ<Ic;6rj*SZsReJ5gE@C`_EAFd>v
zHjd7dc?1pM1_P25pK_u&UDcPCoBVL}nY?knh|c4_Jvx+-EpOpP<8W?0DMK5)c5z@M
zh)0CGo8q#?WrCdW;O%Y0efB!bfy!#*3+?q@DH6+B%Cl$Ut8$&^rI3?>i!?42y43GP
ziJOp_-r^8(KU4B1vcauknA}qDW>Y^RR78WQ?}3wu5?h2D#Y5VFYxz>p76!nZ`f>mj
zwW5;tsrg{y<0A17Sk8?@9Yo#)P2$_yOJZxSq?Cr}+iJVByELL<DDCfrpRNyqkq1UM
z<3_I}ME6f~q`Gx+4^aa)mQl+pAicl~p9LcwkSXTDoP-JW1(K})Fg_@``_jr%k4q^@
zF=yOX0^|ev%=@l$q~v4WJPX3ul+@Qs%yUs141<zGcouA5shAoA?MC%(DM0wdFMu8B
ze+>1^nemz$PG))SJZ`X3Ytu>M!gOgt+ivlWt8TD{_NY5Bz@miQf|Xpj1IQiswZ!2j
z9(BZX7QJ=YiX@wQNZ$TR#`A_66)+s5B1OFS-vHcq7SnbR;VoT-5_-P@gkY+wX-qB$
zc$C>HmQCYf5NtouRKn}e6nfj!s@X$^ZMEyF0RZquOd5F?)yJ?0y12j1N`&B;8gBA)
zNp7dw<s08}h5&r(m;BKHHS)$YQ-FJ40p`A%fG9kLC2#jSGdZjh-g=Q0aBj&VF8JQ#
zmFpa%)i=3XicZisWIc3sUj&q(8eBdN7owTFD*)`+087n!c4hIa8BA&3yi<m2kGc~8
z89)YFLdM2)0dRnhSHa}vWh1v8m`4d*#AiEp6XKn{=Sj_e*a*}oiDy0;SxQcXL+b)+
z*u`64`vhpbcF-TLk^eR)%7=kG5cpdW_%nY}=iO-U<sbbgpx1T3{=OU0&2L0rhngQ;
z2|xQY{#yQ|dk**VmpuNL{7E-+z;PY_$@krermxGd!;Pvp&3+3%^Z(cWq(569BX|AY
z%U|;N$=}Gle!J_}zt#G<=Ws87$>Tr2J{WgyC*Vfa|FQKkakoD1<u7^sv_66^Zg)j_
zbl1gi;^+1I-*x`DeQz5WfWV#7r#JT`|3y%^tMk42QOAD~)bHxXEB{5K>#F<LUkmj9
z>vnc`)%yMay4^ip7UY}nulzcM+*K)~{OfkV?%~IpKYLE}em&jGU#_eF{BgDgc#Gsl
z)qndq`;pH7yIi(Q1YVac0}9u9veC?YJA1SQY7zrh;$F|zkOIPIv@hr4j+V&vL0(IG
z+F)JDc-jG%Q9~RN;;Q()d(P)8YEl21+{_m?HE1RO(Pby24`TwSeqq|Q+wl41j(`cb
z5gwSFI5gGVIL<x|(*e*fc8*5paSSLh4aJOuc`=7wnlp%3wWNuA%PE#aMR8pGhK4dS
zB#689P8RvFP!Pfx&Ww-9{4go{E{X}OoX=urecC0De;2Lr4*$G%O8%M9N4ID3>ZmHj
zojvg=&4q;<hb@QUV1Xdu^`ezT@4XZIK;%|v(5Yod(wP_a{h{?R+`bZOSAfy!5nSv&
zLda>`2;g<iYV$^m-rr9TV($z0LRI+K;&PG--J2Mrer|XEq$Sm_%18Pf{#cW?q3Ed+
z4u8PbrN;9RX}QxY0=c-6NN%e6r3p-M^Ja=v57iy6^#b<9Y|Wrca+{J`jIqX3C*_4?
zEx11H?N9Z3>r3;I3j02EUT=H5JSfi8{818iUKCnu6PD&I&$ZA;7NY9E)Na<<=kfsz
zLV?N+soK;GYZ8-pIzxoz5I5Zp=kuj1f+rktvmJdDP&5Amh><cMQ3KdQvR5*P1;4PA
zfdbMjWKtY+jPY=m401_#GcBApNnw)&&(#5JjU7=^=1f1t9dEmEcA^)e8%16<vWB}l
zc2z^Pw%imRcf}Iy%AZj%yQ>4CArf$Hz!)4EsOoTh6t6gjnn1rYgTcE{qxQLg*}^Q!
zbQjU!5xK`pp1sR5X=WAK4ZtL)@dnIoJ_+`%6l7O#(a}wMDd-Oi9MPPdc+d@s7HL)W
zFGY1z+U4rrarI|Rz;Z6U8O?u*(oqo3C&I3lTjW^#jFxTWJz8a4xNo8@Sh(v|i-Te;
zrL#D=k%$?;?NjP>q%T>^)XN1<iSc%N*cs;$&Q<W`&lW-bY0);IpUaL9w^D2Np+0Y^
z__S{G7_1e&{;A|@eNGXH1i8*>iddUcMgaIkgd)15g^ANl>ecInsA}fvvoB-?+hX}k
z>4W^F4znopFw~dmvBg#;eH6v5t!Wnkm#I$`VB<3(W1$?qEFK3(L$_k}ka1!}NR3X*
zztv3mZ0-{r)=;XG?z#sz0nO;_*$*m4^~g93$%}rC7G;tCc_t)m%m{K^aT7B^n(TX`
zfX3#L@h&)HqR->t;wcxYfPryg?@2k6^r9^bp9*|r;+~%?)$06cz2nngePZF^ths|t
z37`5l=$*gwwwd3C2t99v-W61j_jZrr=4vi&_%JZ2!4ln72X(Y%N^Fdh26eNVVx?(y
z1jYs<L_4gbP5!z4*0WWIPlwAuNzw$e{hfvJ339*y^62nWNojWJJM)1bp97D10=T~L
z<~huHlsSSnn0tlfy;TJ?%&CzB&3UzT(93N>%ak0Ea&eleV#mR>0`izQmJf$7r+DjJ
zm4Bg_wqsx<0l|t~Yi&2MvXPk#UUsZe`b!1N)r|goX<)aQ#%VMWT_+Lw-D$*kP(`Ah
z<)&E51m}>X*0O9*$c?S@-y8v)Kry@Z6}8c`fKYgXrl;YRJ;&Fjb(W6w=Sx!3RYgpj
zN^5WwRZ+4kA}${Xk1xtY(znGciFvAeu(m)tm<I?#%MJej?kxDw(*uG3n*@IKK9i*%
zuFAf%QCR+-zDeC*2mfx^{8;^vdm!*Y;DNvcfd>K)1pYPzKx(e<54&DRkG*deCe%5H
zLO>E{$p~Yu&v1ebh9wAVH($l)@gK4czCZR#$kVUJ)=1Iu-ef8%LiV{p^=zjT(amVY
zdMwkOCywKG@$7iSzW^DGi5)ONrGGeTpuF6~=BtG4PEVfKF5(qHZ^qNe5|lU$3IjL9
zjg?~j14(b`y|l)|2xNoCd7IA$p_!=+%Dm-u#`teB`VR)2ffp3gY*9}*GIKCm-nNeq
z3zy0c!Y;L=H@<x=2cQlxKb?2Tu?q4cTe(o`5Cz$;%-JO18FX@!S*zxF7P|zIuwpv^
z2Q+FSmZRyH#Vl%d0)tOnMgVk<)l+I~`QtpqZGo+a;fmrPS-ELcldfi`PwE^?Qwy03
z7rhX~#7RPO_jL>4AjIQo!;@i@Rp=)`bIi5PEg{m0YD1XHCncA5gS<3&Pjto&pg*M=
zfE7i4QZ^*4Wgy8b#~~vb0@Y}78FE(nR^wOlq@yQ@XB|SYe&e|pKo35NM3;1tiPg5h
z0Zd%BP%btepIWuCor<jRjvvk4G#I4qC^wZtF<O2BNG4j{i`K(5I}oF&Sd_4<P_P(&
z`>iMyvARYs*gERH%rs>DF1_}nMF7nPkwbWJ$(T@hS5pM-!yfo2%kz`A1G5{-DrK*U
zDHymu=q1^q<>CU2j<2N>YkeScB9i*jzJjFz5qxZ=p_X*+vugf+A;<WU<y4#Q)01T?
zUfDA=k+Ve}!2uspP6G~dmF5>3DWP=`qR-f}%Gi26;=bwIy3luN^o0&t4ZBl^xg||&
z0hAFg)8ntj_O*LOt_6~RYGMIT@e=vagm2s#w7GK$-gJ_rXND+Y!=fDZ^mR3}*M=5-
zm9}{QqvhQp-u0)bvS%Yu>+Iw?F+yJPe%tl*)@847dFIM<*I%Q1$z5z9k(P@BhpM2m
zxiRK%G{=KpLUtjjKzd<ojlIA+*j=?sh<PLDkk4rQ#V5R<$U5TLR}@6J(9eawC8%c5
z7W1f4tr848BQ2&<`jBFR78<O&L_nElTe_tNHq?*9zhcq&HT7@??aP7m$=*#vHk5r7
z<i!3Oo}L}?Yu7D*=+8()i1x5n=HT&F;!>gsa+M5YaC<Xma89|EXhT{WA%KQwLT1%^
zBA{_IoVG|nWRrFK4Gb|WweDE9iJ{Ova+p4l(R`}5-!|btfpusMW&5TAc148Lk?LPQ
zb?RGarzC32Ow_2Mhy85&!CIhDI3>kCB7cK<hIeVffKeV-=LoIANdWt$6RJcT@*F^^
zX;?ac(22m17D#DQ4$eZ373bs87u5R1qF;jAsD)-~JdvY1U+cKLEMy$IBVuqvR|@L1
zvs#=it}60UuBh54f7)f_a2xlZv0?#`)MKU&l=$e6FA62kJ&O}JS*CD6Hf=bjcACwx
zMx0oww$L~(l(-Cqf?@W2m_(wP5;2G>ohoJ2H|GND{4G?f*%4+$O;!uR@W`d;2vDD1
zrefs#RX^e(UuCY$=n)p8?Es#Bz46aqfgGQ}gl>F+-baNFOkfVxs8GkR_u>~yP49^A
ze(c~a%#F@Czq3&yF;iY0xYcrfj-}bdqk?0f*NEDZDl73YFAXlhNCdYQF)kyMKz<zn
zxVnk8U!W>}8gGHtvbRf(te*n^?2m2*%r|I6vddzzPa~$i%X`a<IwH2bVK@oWBZP#|
zM6jT8gJmaUj!6hTjO9w6J+3JA<(n9?P!ew5PS!MUs)UV8#URo?&JFf+6fBRUfO<Kr
zF#hQP!__uY$C^rIl$RYJQz$?SZyl8&1k^9D{B`8EPr#n;)nu$_1wJsUWx{7_YuxZ>
zd;*pquEPF5_TDl$k|oRY6*Dt4Gc!viW@ct)W@cul5;J3onVDHCF|$-sz1!QjcDi@w
z_SpNd`_}qZn5CV_aL-JSpP%$_|Hlb;Ej#;D{%ErGKJOonQsZozHLqTU#^W`vu+FSR
z7%G@a%c@sO>rU7%d3lmkLl)_T`-9n8y4NJw)cHDo%oFMH4X-34y<F>iTeg1t-{Z#B
zQ=ORcTv5+SqNxtb_1bkm;2Klj88KEdzHvhWBBP|owMB9YK(@Txjf0k4mNNL;rSG<~
zJb!XMNG}qx+k{sx$yjK2Fm#CvM{i@`a|bNr#c5Mt31DnK<B5Qa)8k4lT?g#$EwLq(
zNoi0%uP(WL@bx7o<jj<;KD|*xc9~O~$mhpAeTyGP+!$+!jOFSzrGjoE<7p?x{sfd7
ztcjRGo|hG$t=$}SXgHd19`qKZ^`J3T#O~o%KyQ-zo{Nom+8%~Gdq-->F#E;)A^G+c
zDcx#wPJ~5$Bl1r1(;1#3W4l)TajbGY-++sK$3R68!YnnL57Q>jDI1yF@rOQ3o|Zqk
zdWBmLaO^e9IO-c8bHUphM6TVCPO41G+lZ81jNU%gNS~D>AR;k)3(uX<hll-h9(Ysc
z>Yd?XJxDTOY<!UNfSJpaABZz)B0*G*^sVL&Za*P>+ebz3=HdE$hxP%(_zstn1sO*4
z0|sNrmmxRtMi6wU;)$n#k*uLsR+OEDJy{UBJqz9o{IIF!CtePi-2fx$<!2YZYxcrW
zDgoeuhS_=aRnEb+S{g6VK~(=&QF4kCH6kDZ#Z*>o$DTMW9xxroeGF>Z?j}}w8^i_1
zBY%CplRf2&^-h5~-uO{aEE#H%sG<aH^%TOgHLIS13t;IQ4QG-9FUt4NmCf2K*+$VH
zHa@fs<#O{D@T@X-UHovW?glJ0pQILA2Bo&^TkV?n(g-E#XjuMI0fljyuAn5}x?4sh
zvXW0Q+f<M?vf?HkI`3XG#i6(r<+#d<B8D>W=}S3hfi!C7CqeOH{SC`rH4WEi8r%qu
zim5L{=$LBF^f;68rCN9h%Q1&c)qMtEiwaSr*A`BnaoFs!V1KBbzPH$1qmmW~h3y9D
zkPa%n<iI9ajNM}?wJ;mgzs8Lb(TsUMzmmq#plQ+vsLE>Nsh6<Ji+u0^x;+F_e7pOK
zi|f)eDB|-YfAw+h!EoyU$}}u8Rxc(tlVkNG4KhucC&r*io=w)=?1wLF_YQjBjgVO$
z|LD3bz(hrq6UPX^(n(#-X8D9*Ac}T%+yh|+rk2p>%R7g@<A+t}B*a1>us|)Yo|0i4
z5MXQWPFb6nFDQ6Fs_nWpRr9iw?1gJ1wq($C&wedy1U5{mMFjunE=hGtqmSVP6t4JO
zyJ+$|fLp<$N<n-$;KQ-+g`&+}=K*s%z{i73J%rOKY@zV#*Vj0gTLqo`jRW>3TelJB
zD(o-CgVq8DXqBX7DK6?wb7h$?GC|;QK|(y6exql5ckTkQ19`kCw&stp<v@$*vFSE@
z71zR+oYpw!(N6JHSCx!_tZwanY|j2^8Gw0|<)R2?J?*HF_2piHUT4=gIkS-tp%(WG
z;QA&dgUK2wPM|jo*GyOahN!V45M3yLB?yuJ({0%Q=*e0<{m1^__I};?{w)aoGne1*
zeiQgj;5UKa1b!3v_agxE`455+YjTPG@#+GF8DS24oV`i%c4h1+ji>h_b<>crRy|1N
zQZWH%y^%fXB+NIO(Hmnn$<C>Byr!+4O;doCVS;5FLM0E2QxOg#y-gJ+56Vox>TR0s
z)iLZZZsd^pR?iB{z^_*w>y7)cdzvS3lokknsOKjt*1f6$bXo9oE!g)mS9vMBv(U4S
zt}58M*<rw(7o)0-1a!hcoIKGZgA?=~Aubq=%U?&nib5S(d~_24BNW8ArE^FGk@!sp
zPki1^Sx&AHBxs!ViS%T*=&D;@61^O@T5wGNNN5}XX<}A;+EVaxx{^A?6!JWWp1G}F
zPa!OyAVcD8I!t5hMyH$lBgRYTO-}H`grHfpnG-BTeM6w5O5<iG-W4~m@ICN6qR331
zf8mGS<mf)}h*`EWmYWe|o|y5Ie+k7(q3rj?K$0vacv2;9GRe++ClA&cV?Bh%U{(3W
zq`GQx{>foGcIOv4tQ2Gw6-e#eerBcvL_uh0r>;eei&`K&`6`_FnLm|?dUDQUHUAdu
zU@4<u>5+mbOEPs$_l(>hiQIe&efc*(<z)J$w!f?kzgzE@<&m|Hub5P9L2Ag1sD~(y
zz~qGp;dQ0+Sz`7o)3&#g8VvZ?^uFv^KP#C#Wt*uH<d;DPE{T68AZ#i1Fs|=!0TM#!
zRVde~5YSc>&6&g~NZM`2uF#c&_*xjP<M8fm;LuTm;bVk=>7E9rfyKt$DMWU@9CnU-
zqaHyN+L`E)=1+`RjGgK%y^-cVZQ-w-z2LM(szKD$y(xb?g$-L>ggL5KPj`%I7LGxM
z(`!Mfqt%d^=5r!^Qdfan79B;V)#o&CK7f~gBtio9wP=BR-as%<qdD`&?}#!I6Tj!a
z>i0s5D%w~?u}cVGgHn?FK@hW)w)Hx^WHjm29+b9!^eh4D#u}jqvavNig&?+Ir2}e)
z1@ptpUwX%P3vIV59Y<+*9z1|n?nNWPb@y&u{8{9NfAjXZUt)+lndz%Jg!$6Q&qg`T
zyA~qtSOx!74jQ4D{y(b$V$ZLmN^wmP#)=Uq>VyN7wlUieIomGa)=Cz5Im4F=1%e<O
zVe{rPnK_CR6W2DtX7p^)LGQ|><C{|9TX$8(H_mZ`kfsQ?{+|;dD|d47D5tScsQ$Qz
zk{DHHis_kyj6$X_pApgW`Hw&6>vXP~xoUj3URI44{iXa8%%aK&&ltN25Y$!MJSYT$
zEOfSlK^^IRwS^m^PM-Ho2DSt^+)uB~a<RZ_O?3SGGIRJo0UCm*j48%?D)H*%0k`!0
z;y-_s*OBl(yF&DXm+Q-ZkIw9@?q;s+L@jR4#qaTU_Ni8^MT-lQA-jO{U@7B(yNOY*
ztUyMZ=s^t`ORBH9QWfehayx#zTtF9Ta0m`t7Hg2_VlT9y`cw_^^J8CvY9=%DVJ0~%
zq*2%Hgo2KGHV+OP(uxkV{kD5~>k3?_l74O=)#JqD{me3x?D>;j8HEbPq;nss-|K+L
z-=Ih<@cLFvzTt(@W-$9u#d?(C%0GtFKDt|&sU;lMbwG)>@{h*W@J8X-S|M~0Va!NI
zh0c`^&Ae~yoXB5GVzbC~`lgaLAfE+FBeF|{i)q*ygFKF*5<dZNRXt=QR6Z}+Q*r7s
zIAqJ;uY$ai&_~SQU+h?+l4Zv7vCE^sBl%ABhrG3e;Ag|X9#pL!OOeUCi<QJX1gwg*
zpyi$pn47f0%o*%r@eoy2Dn0?zbQlk#(>(!YO}@BCL3N!EVws*lAAFM>(iW5dE;4x(
zA6-8hbeqG}0jUm_OB5LiT+_K&ROXz)|4z9}CI&_~e-1wb2Gv3@LEbT+i?*7NAmV-W
zNCvNMaV~S}KrUW2HoSpz{(&Y>>Oy`gLo!}xk)Fe+VOiWb)rUDdS_NAoz*0?KnCB{R
zGam!CJRT)v?Fp;gzA(fr{1Ss1YRpcrivmL<q55W#alF<n`?Ze)2ERF9tyad;IGVTo
zii|4HF%!`Z7Jih+M=zk?wVcdG+@1|L9eRGO8<jYdf8cYGJ2PDk=hPlv;P3?8Q&dY@
zS55x=g10ADW0gbbEq3YR^EchIW6dE;A?Ifb*gah_Jt#K|<yleOeeEl6$W}6A3f>M8
zp@oC}?;$KAM$cAIy?TT{c<}4Cu0NxUg5F_Hr`~k>5;oC9S03JHQXBfNPEH_9nN~R@
z6GZu~)g|Lr94o4#9-9@6(sy0rR8jR`x4OXRDxyQPX@h3-L@*^?n|ZLs7%ZoFxY9u0
zH7G+-wTy0Hk-jG8I3!sf8B((^u8@x*J~$_NoS??}pz86Gui69djycJ+&Vh`}a}k%J
zDsQKc%Lc732Y&fFvRZXTZ7o_OiP&^OS?SJ<rmQb#@wiHy9vxIoMI5XLQ>uVblNa9T
zMhnfiOKq5qc>=#T>`BwwHW4(QA(5}vLQ#O3==x$QW{n&JE@zL*2eR6KTNb^@jq1?p
zER2`w{ar!KGx=5=)NhtB!Vo{6tA7UITMwwV)x;7beGFLg>LRT3vcsL(dzp&bg&Hy9
zckLzNhJvR3L|h>7^IrK$dyx9-a0XtAcoa{0OYTu^CX|)-4*h|X7Kag+k#*d@X_m9I
z@^t<m?+1`~%TF;-&ybzsGvBVjc`^_tEH#hdWiW>07@}Z<R`AQxh$eu(@*GvjO>OJN
zm~O*7vOpj0o+67<YK@uXlzq6tNlq6!Z>Hr_J`fn6GVOm_>v=v4te`|Op`>Y1*XzVM
zT*S{T29$d!^>F4_cjjcvevztmd`Yd>gGv!#NEMPY6&BDdL7S(6kO-vG0XM?GyQS6`
zPZeo~WwL~JNH9g9Y-kRDOz2E;Fr~l$CQ&Rx*~eA=ASR@2BNGV>pL1;my(+4WC0BN#
zM3lMITsBw<u?-0S{d>E^ON_=DNJ&o0NC(y(q7Z%r$_q#JvkJP*F4S9fqn`@>w2RG9
z!<J6&D?_x^EfHb=bO)e}8l5i0m&cWL!*N!<&FA-JFK5@mI?1O(w5P|67bpzB7r7U+
z&4I^m5@K10;f>P17HMq!1z?qPP`!JL?{dJIP}+mgywyn6p9PDh33}1Gg!2_^)<En>
z?FN*%q$EM>Wpi-1YgO!GHC1W-`m7EYEnHPAQf>>nG0)9q=)EDCo(lbsObf+F)*5yP
zJ9R0EbqEcd0L@FsF=S+P{ZBS}^XP$n(9KrX`T57X3YAwFA(Nt|+g$J3IG?4v2bbBz
z=%40Q?&L;;qXIu<{Mqz#GQyPWhSzB2@7hIVi6;u$s3+Nr#SU%P$5ZvPnDLB>QN2vc
zpEg_!wRX~yzL9*NwI75zzT`xiFly{THL)-anin_7_ho4SsOMPmKZ>Rc%En%P#6UU=
zuC5kzFKa7uN#aIrcf2_Tc<>dp1$=(hW<wXtiy-bh0c{`M+!z^c+Rn%NUt{F~0{?Z#
z_*bladfC6ur~ha2Nyop=_bXOj{f{2gzuJDq%J<Iyt6Ts8zhdR3QU7%e{I6JfYxIAe
z?_dAgf297EWXJm#k{u4zKmYEp82aCm-G4Csz4x2IZvwvw{3h_5z`rK}P_%!L?0|yu
z0j7}azziG7iQavweh!PjieKN}&PPNj6=vk&K@%=6PEErwQ(-g~M7i{lSpOW!ewsHB
z-kh5N`3f>wKp`<Ydpddu-{s~yLO^cnv1kV#8>s_Q`$r6d_oW#vNcaO5ygc+(#Cq-a
zx)xV(dw)5o2P*o#7H6rS*k`09GCLu{#<(e*G`tBhy60}RFsU-g6^zSEedC(feTbtW
z_Hizt$O55+g4$~DI~<$>eN#Z}nth<(=TE4USGBIxFTR0PooaZ2z1U0Tg5hN*D@ZmY
zYtI?<xL{Q%t?Wv^tWToy(tsV;u&Yje5EOMQH%>xo-wZDvb{YoZXf{Ms<+<RdkR>xa
zi&6IKsePz!hUO7lDs!aRTd<pr5Gnt7(B>zFq=1F-aR>Att34g<K-`nV#yJ!M(d_)>
zvtfnnfEnZ0wsJqP8crKlzOP)3U%t-F-;ikA^q0$7aIYZNLv1P@@L|g`^dZw|H&6_p
z-97-M2d>l6;+O$h<}E}(>NEOdk@o))N_WhU?&`%<0Wm}YC%lWwU$#>lBT)>$&BMbW
zkMA6BBiqR+m<(^_feWR}P%g)Fx+3UI(`%QON|7fz%*grC+K**X*Dgfd??)Tf2CWJe
zkDN_#IuZOmcbeh#(@m=Q#6yfYtTJ#!w_kw;Ll&J9|B!sER9QQZdk|n<7j}e0T`t#8
zUql<E)8XDk>X2cK-j;+ZEZ7UIVXY6WJEq>S>uc3I^2yVd`0jxTOD53(jP6+<H_)(J
z?Mnub8227MfAX&Ci=HjKcqv43DmNXt?~H-=;Z1cG!Q{lv7fA|?XWNEaPR7@>!f@KS
z2Xb>lT7*R_5}r6UIX*AWW0`scyUvt_P>H=FJXXYh6+N|?TVZz{ES3HIM%oWVll&C}
zTWx3GwtD<#WWet)(#$GuE_Gl$MWrxvH!k?1lwA-Z%_-#GhJq5)jh!d(1{X-;Cn|X;
z_`*jR`F6&EF@A=7i$`WU@nEI>GQ7fpwyfbtkdf%3xEFN*QqnP#P?4%3v9^C!!-Zq?
z`EKhx)--%fla3t%O&Uq5)o4r3kGH6s*Jpg1GdpJlp>jFJ&U+4nvVgiCbbciELG7XS
zla(8Lsp{7n1@m_58Te9m#xm7zJ`6n{ZPD{J+f=}+AxREs7xU4;_=gyRE$<e-@~HWs
z^dCdvD23Ax@19`u?(!kM9rnk1kbId^JR{G&mTXje0}n*FmrgL*?s6Q8+fOL!YNXy-
zO=mZ_mT=~4QRO6ijqAyhPf5sWlyaAk@jv`0)Q}X+4k$;XUw{J}0*BkUv9eJ6FIE9y
zX?OfO_N^X%8Xn$c=zj}n)b&Fa5+*;5yMb7msn_Cy(Vx)`Hcf~xmlx&b{JCD!!*Q6q
z0|o4*p5$ixCMuid%E|i`4-A4%4~MvxKtF6_SG6VI$D~LgQmq*&d07bT<J%|sMr_BN
zVW}?CYwnL-b7bY~?9U;1nEbFWm21U{#B+1&3v4$4*+Zm|m9o(%h3q~WO@f?fjWoU|
zu1gc`f$7f(Rg0{$t*1W_=I{S#a1(C=Fxh1DLG`Uj?cUYCrt@k7BYk_6?%04!b3gpC
z3nrn$-j@e}ESC%AdX5-sr5qEALC=0#iEcFwp-<8<86SEjyjR7-MRKCMcaUVh@BlP+
zjp;9aoH4@5<jS^w*T53|9M35c!mS`C-B1tM-L25=qo*wlUH9G=n;*4&;##d9Lu`Z<
z=<is5v_+>^y9V`R<nM#lf6t4-@3Z><5rO}1PLI1kS^Tp9_-y^(9=iPNIbU$F{;n*<
zga0GW|J&zm`0wT#_{HIu{YxJIhvpjiTOA1e@dEl2@;|I`jzjeK3iw-5|Ca54Z2y@*
z#;>IR(*Ae;vHj<#e%Zg|@t@}O0Ac!j@oxS}YWmOR`=zA)m&N-(e(=BZk6#>q*}vrR
z|Gj@aaQwXj{#MlgZT@ldXAS+bf63!N`3JZ7-;3w}Z`FSM;_%D<C6E8_{i9Rv&p+t@
zPXAc{(?5RMf8+u6N7`RsUy}6xUcBJ{>AQdXs{h;Dk3UcM{~B`_q2Jv(Il!nY-#bD#
zt3NSb)f^GoQhzTa+k%0)<U`@a%yD`MM?<mMyPU}C-)W`4|M>bqzaPS4Dp5q?I?bpl
z(H|gq1*l#cm&N`SGh8-)nGZDZYkd#5lvm&Z^!K3$c&l|JlvE2opLzQaYo<s*<nhIv
zTK`^MLG#iftlY@}6=o^ZO~@z|#mFFT16m~g0gH0zx+Odr#?0Fe*`AXNZJ~M(e#d=J
z-aw;!KVaf*Ds~9fJOwkJcYSY;hIw9;SJ|ylt&#$oy|ZY*m$tfImNSFf0{+x5Zh#Qx
zm(QWFOKe6F(;F&?Bpg`9g+Pz~qEsZjnaaAlse-ASicG~M)3ao}Ef`ZBwXii(Vp7V3
zV-(uV=#M$w0)_#pJaf%7c;{Pwc9R+4U!IEJT5Za2q*244Ak@wYqV*92&X`Q``{)C0
zY9#^@HsT<&2J6FyRhRFege%<w7KcfP2AtOC(V7+BdIcY|PelRGhsf<32fj^t6l&7J
z7@dxuXhHj)@cTrYJkv#I#{-;#f-W1yoHT{hg04CeS$JEqpEV?YpF;!6da{8W|JvRV
zbF*Lsv#??E7#AfPEC66S*+gRQub7af`;j%Ph0YDBIqtCY?K%B`xa(S;NvfW^kJh#7
zM&npey<M$_Jh~JqL)hSgC=%K2KCj3Fx0G!hY9GJ)E47&hDaItIf%<2A#KU<w%N*?^
zWin3f4A1Iy;~8b2?=mvoCOih52iiV|DQfS#`7&xFA`MEyiGp}7l-A7P<LSLo>MDV9
zz&W7m116oXppOasvSNU;c021Z*MnmMsHF(L_?N63UBu(_46oj5um1S8&*3Q%KrKsG
zhST_`tz8_co%26d{g$t%3wh+;n25&sm{D6+?-Yif6`hLO5bEh8yDk&J!57N7KR7Md
zVHp83APYZoJ!@_S4Vkdq6LejOIn)~1A(}k2aiUTD5V4(V>6L%ZBf|a?u~=V3e<R~_
z<rpx?wkp{&`VdB`(c@3x6AX~in(7_yM4uBh2<r#Y*StCb@Ev}n%C}<f3vUqbBjq_c
z0u<HwSI48KO<IyIMN%FMEFpDD9b&R}wfDurSIcXm?Ob!ricVYrN5A$46rGWMVV?zR
zIHDC2<h%xSSxt6E=Bi7XZ=7o@CLXRTw!(YqN{@c(2!Mr2GhVDcQ3q$}`LIW#dahA?
z0p{>G9f~(4hH38U2YX~VlJ#8J6n*71UCSl0%E*$!iA5>1N9#|HS>yb73Kx?I&wlu+
ziS<{`G(O?l2~wNuh<<1q$?aSspFGySxzW^+w7v@|bn^J_18kx6uHYfCrxL(-fPIf;
zl`TSQ;QJKlA*<cF8L$2|+^a2qCpD%LoPsnx$f-DDhJYq=hC|0o0U<fy0Zi}+Wsr;5
z%<PRedGj{7EM#^TrWnAbWO}eb&Hch(>fH`IzQwR^elH%8Ga}o8lf-b1o7f8OmV8nL
zV3uO~+C<D@PiJt`rGinaiUdZEY0DF`?eEX*KFz5mlAmx5^K4f@<F1k0RQlEHwY|A`
zl2orIt0~4XPYyS(H?uIIl_{7g+{<wTh@^WH2kYF%0V#4=QHHBe)A$wk;4(dT{~g6L
zXP4#k){5MF?wR!Hg$W{c@$3v4^uyk+QYiSd<6Jv4BF%b*l0L{lgTjVJLw*$rsVi8a
zPO!8Px?i}MSXCdhb%QxF1n5XbjHQ@{I0T+F1U>&-xJR2N*nD`$+F{HfINq3OA6!R_
zIwtGD%UhIC{;>?8mpz9kFS^xTT34b~R&;3cX?MK}#Cx3M<P|!~&{>XY<mvvngV_C`
z0&0|vGmSnRM<!CU)HQd=Cb&P|9DQ%ZsasQ+xcpSCRV9n@7i^-_Y|Lt#+-V4EW#@O4
z!&=SmmF4erfezLez~gL`C>xO`&^UpfoaFbOax`&<dQpCndi*wpa_y??8brK3Rn}U%
znKkw?HjUmxB)R|$Jsms16(HKU(-=L%z`(;_2S6WocHkrUX<(j!z5B{O(rmOr+b}dt
zzOKBCBLa*Zr8b(ldGa_NN6J<lZQzcrbXXmFHu@}n#Xy{_7rGx&BRkMHJ6fFJ);RQV
z#HCM?Jji5enN9sHm)K9+On1xgu!~T{T%sCEc3161ERk(^J(U(n@vu~@VAqxKxsnsb
zE9=`7u(&I?5#N+MXoj5?LuB<zUX)hO0=5|(PjGq|i@eOpqwxAcj%LG75Rfccr9e@?
zbH@bxj%`zpKSa0=8JR#BOV3crrW5(8D}Udmf3gL=IXG-YxFCyJ8~Zu4al4=$da>!7
zENqgt|0$9r#%+o2?%}9z`tp|3-PzK>Uwcp&ac1*bged0PjkgsY8hi14FKJ@uOkPer
zW+dvX9H@@v7^4TgkwF%v2smvI7lpMpgZC&Dxr9haZTqm?E-rc+g@Fg)JE^l2dk4=v
zgBI46Z^jC(ci8u}P23vk`%726FQQrf_xobF>3y2yf~*=F=1&ooFcw@fQ7Qq{iO*_l
zk_^;Q&06CVe$`+_Wmvr`Cpo)<)K)DCDAIE!%fuM-l%RmL1KNCTf{oYQq0DFr^hGM>
zM56fr(;b7qiWg}6r(XAeluDZF@4c>HM}Cci|EUx9EAhAeP2e|y-voXW_)Xw9fqx?c
z;D7W}{-!ts0pI+*!;pmJppXD5M6cBMCsN_Ct!P-!;{Cc@OjG`nh*e+~+St8cAn62@
zJIB=ov>|}$?U&#kErP;`M2e;$BJ16XiuhiQO?KdsyQ(`pio7g16V1l>;~s~w1eW^-
zaD@og9m%*e2LVqBeUadV9w?tS_^dij%jS9$*90s$Da^|Ca1iTDxMGrl7logf8i$?y
zoi&I6qM;vdUHmqj8QOpam^YmSgMqToj868DYu6lY$H8lISbLQOf~^r17~{4v?@Aay
zYPzSaUSTG29WZFyCGfES0K52*fDXbCwQ4_fl!aq@K{5(~#av;RGBM1Vw4!}ED$3&c
z;`16_kmnG22<17OYMPawH8_qPpf4NFh~vKCo(AMKV4QyLD)VzwqlNw3ppT-u(>+zE
zwacxHh<v$2wBu`^mdX`H^GU~pxwxz&EL*jh$Ku$_ZR?cA{+5de20!tNjKaBx)@p#b
zUTosy0Ih$$%AiRLUAtg=P81<^R^m6#BKj{wz6D;dE3}|?IJQ0Z&wFic-TfC<Tj}X=
zGR_1^*=>Q)Xv{+@Av8^!+f$NsS-vz_am92Qwi6JG>b@Fmd`H{S*f?OeUygD17=757
z`pL-Lq<gPzkpyA~f5bfrc5t=E2)LXiyoG><y5eQSh;^g96rDVjypV0E+YfYoHxs?U
zto6Z${9~oU|KSGIEA)aY&&!Ay`rpRSzIf8&avBY$)sEA@nAhrW?d+|vSXkT)x=mj{
z3yx~4dM(kv*`%-<I-M>>0LR_*)9~$v)UINHpF1c!%)8UVv)OdJgDj>gy?9JO=b?P7
zq&yb-tV)b9aC04k#7_vx_@L`wUa@N3>i{BCmRb@LiA$eUq3^(L(FLhyE*K?*8nJ#8
z;|btX-5kA&^`}klwv3(7lqh9Pc6oj#uMTu15hl#6Y(~J>Y^!giGH58(6!cP_aXFf>
z>20uI?O{%i-xj9J&P`RL%fq1WbO!g@-v%8vo2MCcE<cs}EO1?(#z2pTJ-4}YN#+Wl
zHmBj85nmXPBWMk5_L$ZvlkrJBTh1tB6x%sP4j_n3bw~9dhOHY=zXs%riqZH&J8s|j
zbKG!`L6w(Za;eoB%dFKVK*06BuA#PlG7khV-8h8M^jI^@RUNMI1^h01t>`sqW%Po)
zlpZW50WJYmvWywZ*IBJG@}m}1M(Z^mZtT5H?)_0cQrYT~-5R1I5GyY3%V~!GkdO!p
z%($3Eo?@jwV|?hgaypnO)sH(8DCh$ko-h^VBJ)LbCCJr~q6h0OYR&+88;S}$qMo`>
zmOv?;8+LA$lw*e-OA2FRHX=THbpq85{X9RnA81{DBn`cq{?aBJWh>W3XPm^aS8Y)v
z;=4S&f4V{3H1eFgT3x6-Do=lJ${SiRau9F5uyIdWs3VrOQmq+7|EWI$+MOQ&WOtTQ
zHp`YmEQY4J_0#*Jp&D778HVnY0}nMr9dwWW<9MvD)a`cO!kkW&ZfrSf-)b`W$+LKL
z^(5Ue;Cy(Jcan-&5Wc^rL<g;U*ya?>#O2iTgQ<icTI%|xv9B~epsk33(6`v<8Sp@X
z1ZK=EFJj!5_m_^0juuwu?1LA=nnjy>bAZNvf?LEc7pL@s*paW>je%4$qXTq7PXsuX
z_sdfl9^YZ>eo#wJv35oQF!#f)Pjr=QNGmJdRZ$nbEndPu8QuB^dhNTi5rZQR9twah
zNrfU<9#)p6ggBh|h$c{Iz~c|T&(9Qt?zxH*cyOcg2Rm@ta9g3BSqv3!K9%iP>>tM)
z3K1>?02O@?3gl_qZo$_6_Chp1RyFvUTdBtG$mALRjNKU&^D*$rT6`{QnL3RHSsK1n
zh$(@sIW<IOFzNd-U$h$eG>maGu8vI#L$$k@6|i?@qSeN_hsk&+8qTy5<ssfjKJ@6d
zIwwPSQuY!hn!ToBo{FQOK1d+M05(RM)zuII;a3kPlyEGjkRbeA5!58ouG!<f1|MtI
zx{<Tz4pN`&k}J_AJ;bLmx#f~&J|4GNt6yUD`28p&9`{@!LGZySu~M|ow*J6nKBC#+
z&#)Q!COb1d(LM&odq7ZA^#pc%dep9mVCwgqvW1=o0B!<&ck7N^zjy|$ZLu*l+INPS
zG*NH32Cmudf;=~il24Sxoc=*y-ED=ZQnr@927VO5)<Z5WZKi`DQj@zgAJ2d+P1xar
z6L3&TAGX$X){6u(M}^qY9$C73M<n!2EU3IlITo>Mx)v*E=5v?h7=$RbMU=LNlZP3{
z;S7B**1eCP+)1sy=^6%hqZ_Go5X<Abbl(kRuW#N*Olcr)QNU+&pezgOzVm=zUb@bf
ze4lp*38M_(@g#s?x=)r{TG1bCvrZtWeLAuP*ZjGrjT>A}|7se;5~kB4nfHDbmL1{t
z%H(b*7R)a_MLK>+!(kc4O*h$xs-bxBvUy-MkiJ~%&U{`S)UL8$;*ww^5(vS<_3c~+
zFHI&VD$y!x-fRh6o_x3F1@aZ>3Iy?s^YXck*uydnmuoYSc^oQZ_<KY8rk=t%x08fh
zB9^j{6cg=DP-|~Y_4S<|4Trb1Hrvh6|Eep@1P_aSztln7rBWT#OThbJCw`EwY$?Hw
zV}}ZUD5)$qGdW;S9CRrP4h1wDUxg(RZE%H929+ZT#0$rm_psedLQ|{v^0{hETIs^c
z#ib4#WAKK*_W;ad*Ulipz#^o>U2JbS8f?rmh6p!F1js1C9Gp6H&DrUy(FYe5H%0#(
zv&37Y#pZ~MGy6g0(%7pRJTGy`d^9Q8-gr@qc`H&QA_<a2G4K%au+>GcFYUYMOlN9v
z3;T`G-b3I{V=wtYtBDAgG&RHr4D+6BRu;muZGR~TS_G(>x6@0)!XX-mLBXI+iXsG%
zVE7aq@;feQR)LLfkK=W9pe<X^*SmAJD2t;+^ea&K4&g~zQXMaQTok-?a1;VCA&dPB
z($=q4XMw2*Q<-_^11DGP%s1ohq|}ARCtQFDY5_7w4TIH;CQEk&e3(qE0Y7}=9Q0M6
zufOJbIJMXkoHs-4A$gdB&%k|L98D7~M~`Bv8WfH(`<2eX+x02gn95+F5Y4<u-HyIa
zJy5>uAZv4$7zEWMx8sxHINeFu;X>NRoW{D9&IL(S6=R9p#OP9@T^Bj&qKp!-?|2y*
zQkn0B+K21AymWY{R~+jl6gcX)n?qyv$tS)&D5M<7s{r}{X>>fCZc<kT1%(3Ucny5!
zFfidcTslx2FF@umN4OKFz?+n7PTZVANO1^ZyH{-?TZv7)j&McoFcgq`m+WRNUv_I&
zJDud>1QU;?aPR*xaOeN9<_a&g_zbDKMbY{JB|L+G+r-#^jQ?R^zyr@OCdVst$r}uD
zf4Byli6*dr8?)9{Y)gWFfw_tDW^g~Y&_30<hf0QYcARIS>on-w+?0S7V%0X*6EOgg
zrxddrW_J;TW0g5npJ1Kv2EQA#oiDKQx(|Ox*tD@boXbB67{fid0&i@Kg(11Wi4E1G
z-hU-ZSsG#Fg5Hw>Ir*S11p=501sKUCz@5bO@{VFUTpEMW<&&dyP+tI<n-DhOTiHA9
zp@Ch~TCpa)-cG$LSe1+C;Lv$-;kAd4&@5^^n35y{>mFxKd9zls_663rOKvV!n-tI@
z=$TK}I8(vBcZBVLtR7HPR5rte6}AnAaCn2i%#`xkKwm>+FE)d%@*_HiXy(>sH72Q-
zXRjV%1q1h3Q_OJ|a<1_OX)GgQN{CeFaJE1jo_`3Czzve<D;7;xTBS4!;i!ok8SfD@
z2^Ya~FBEg@SBYt)L0rm5q-bmWlL|6(P`SxM57kgD7G0HfN`6N;S}2yZ(|r`AI76^t
zU(dFD`2^~;X&i<c)TF@(I~bTdJgER(7cV=1@?8!^nLc>i9md0;C1|jt4fmIIYiEzm
z8{w`YR%|Vps^YTCRl#ecG4%BXIDd7$fNhGT9N+0IA|4iSbD3+Pm-JGidj8f8Ww252
zO9_Tk>g$(tG+nEh_j9ash*WlcWf8?C-+KZ>evhuukzF#gRlSURMT}3xedSy7=zilw
zZ(&Us&)`$hZ0#UZ*4Q-1z<7GDS_=Tdh+Qr7P2jgmKy$$vgVj}1ovwJ`2`~h=F;!?5
zKf@GiHM^;pyF}_Lyi{RmwENHrfYFIsN`;J`-2e@DdP1v)?@+SH?9A`^@YyX?#e1??
zGboG#dEBzNd<!hQrBOzkVSJ^M`uJb?i}&s3lpuMQFHThyJ8Q^BMr-KVPC-yg)Wh;Z
zA&KS&-5^?Z<+pho-@_IXzv3gp8e<B+2v-|H>C;CnYi@jY?KJ42QV|xECJYsP@$OKW
z$|x-8G$qv<f142OdD+dWpbZxh9w8~ayG#tCYX9)|o^70`kd(MX>`G6_08<&>nNyBG
zYlE=h(9G<KDc&MD^>ee`A2Bn_Qq6MIb#vZ7!=d_=iaaq3y$fe_mBJAw{Z4RO1}5*<
z!l2~AYPt3nngatDb*|(qgwdgc9K76Hjra5_hMI&0`xFLvI4Pfz;o;5+q&V`OkDqsW
zCmDHh(+Gp)s{<&y?75Kd>sWKXpBR*g4`ic<r2=xi2Y|YXMZJp5?Y%Z}hqE&5hm66P
zD%o+88C3^#XD6QR&L)l6TL+KkmZFJ*m&aV@h)&=0mzv(xR-Y;IP+sQ_Qq>|&{*7^M
zdeyx#JQ%JfSmhT^FZ`!O>d=6|H+~{#wqD<qN?7a@pSZrpTML{$ZJmA4YpOr)yDHk1
zI_1XHEH|~`K8(u=-prt?$d9-!l9dhxH|1;dH%$_u#RZm7fdyfT-Ewo5|Dc2@x>*nm
zn`fb*)5{zz;-vpHQHHXA^F?|ecGN~T7!R(*52W34p}3FsqwNQ77$xS|Z_G1z#<iuW
zEcu9-{5bC2dH1?@6kkavb*G{b50YywNTd8hg3m;3bQEJk#Vz|+x;FY>=-MQL|Ez2O
zir@LIYyTU`7QavBH-X;-eiQgj;5UK)k^tBrVVVC^*T&U@mN?SmKii4|8tB{%B=Bi-
zg&KS`SBG4LmO=Nfx&_fX(FqmM<3%ew;Zd9lF;XJA1#H_w^r;M^A7tiZUBCgV(43gU
zalZoAt5TF_VQ<$mECAUd0|=zJjkIXEP8h|n!UlmR8_^A(n6i-dUqD!IZK5tZ*f&t5
zv8l6DAZ1ts@eOco9MuXCjPm{xZ7eq8HQ1^e%^B`XZ(j1*=_86y9E;K#M5~Nu`EG%1
z_fzf5v~Q(S27QVr6`7Yx01G3jM|HleL<8eu$3C&)J_dgc&GFgY&%;4c$}KomT;h1-
zgeBHu<5C8XtHcvrMN#tIA@Ok@KoKZ~oa7j+tX448nGb$f+s#%p5aq8^#&c4Nz<u}%
z4*q0O8Q$`O=27c*(?SLCL)Ne=ZewT9J;dir`&ja>*RtJg0{ycH9ts0uI}lJ~Mk6rc
z$OjrtzT7$s#^iCy5Ge(621rU7Ymy~$&||)^$}}+{u`d>MCgt@hT~|fxA_4sJxdrVH
zHMkz10Jj_ZHfhrf`h!e*R;15;KRhzvpeWCcOwJmL(Tk98ZV8YED5<lt9eQJP&L`&t
z`Fe@2K44)Sme<<yfhSYZA6;(GbGQ+Eq5@Gntb6UwATMFHdgQ@2v86bBS<}Infa3O<
ziyq%jPewY-HYo!AOMsE)AFnc>+eO@wptSxJG+7U4J~q0f+I-S~LK37smPhNEBWI>r
ziW;F^t{0Q()!xk%zL@n|PI$JWW~fChNTAoDYr=lyYrq0BR2teQAovqA-TGX^tri<L
z9fBh`w*f6Xa=`&K@+<?jF2xi<ZiNt%Y8LPf4)8lz1P18nO0}1X0}-JJk9rluvxnFc
z0m3uIFd~rB5h!oW`iKX8M{7vDkc<<Px$9{;9sUropYNEDPc`?e71@iNm*Ee?V=pRq
z>-1CMaQ;czvkS-wV`o~+zL5OB#1(5M<tZ;2&V!Q#Z6S5?l)EH9Mg;5H+k{s&WdH#~
z;ikWZ>JSrTHfrq`-eW`y-~jUhug+Z)d=uqG&daTeqBbBLX!E*zC#pWf6!j*`;cvxl
zbSN?HM`B#BjEcTa#B6(r^#fHnu<cwl5a5Hk*;@Ug`=td9GV1f1QE$eF5VS$yHlzV1
z81T#~G=kymL$TcNX>2hH=ah1XVdxWA-<%h?1d*{$>NSveM?Xf8mud1N>0?F}<%!J<
zPGQeVVgWvX^m{jiAFnZ5Vc|~2#E|C|RAWXXMM+s3Ok6;{9j&p2Jjh`>f<L$Li@qZ-
z@r5IWFELH--Tg*Ud4Xtvy)K|Kt0F;W0C3m16NYDTh+nPhj9)CwOXZ5P-4B`U_(fap
z=&BpDS5s3b??W8Q?H<i<KH9+rvI&2>0cYP0zOJo$-eP%f4Yhzh@+Je2x!5_8jJGi$
zA3Yni{vv2Q6m`X?CyMZ5(Zc{>x{VYoV8m7TULZFnX*H1r<{HCA^s{`}!&b>Ex%8yo
zmmE42uX0ILa=GMF<T2#U_=e)r*>kW>wAZ{*7TjlC54r5VIO}cxM%<=m|MZx$D7RsD
zDrW0_3j9u2kEJo(Eg>Z8PcA6CI9Goj_^Y>FKQtOcd`P|EiGreQ@r*_?k47h?CjCl;
zZ_eQ_53i@yLPmXIA?=jm{Klk{VG{O6aq*Z_d67q!V-{LbPN&UnpsU!h{T`nm4%V>x
znIohl$7^tAw{l(sx>Bx&ydLmUBnHqVifV=;^-fT-wl??9*{x`CCa4tVd$vhSO~Z>B
z(D3Y0;81=|PR4zpJT!$30sA{>Sn?FbhnD*7p08>S{t!=mAAYFMyJPO_7m)W5Auxo4
zpW$^Fc1~8V<}h_##z%4m3y|BbDRiX%wG20<VD*#U7!&(!w4CTEgRFT(3CE#25HHC*
zKkZuA9*$1i1Cn#lcD3bAChSkTf(>_|bdrb++w4S8ecW9ehzzO4K5yp<@={L>J`RFV
zB+Ymv&9Lg7yn+lUZ40RFuxq}@<u|&V0oOY_3a(f)J7`DAOjc;hW++SK6KA(Qv3Y2~
z6edrAS3Ma{nPQoQF!!~qlm2nN=)bA=UNR)aazLApPh$EEFl0LB^4QE}c&)zK@m=Od
zy~cWb-y;~UGVM!8$+i_}5Kkki#*2E!ws0@YIEIcme&j3dDT(#rL#`ET789-X$Xiv?
z|H<#!ib#haBSd0cOnnmr;`x2gs37S<!l*@PW!Q0E5V>nty9z-9px8DqKF4BCggq1a
zb8J>2@15Arep5I1ClExn0;}H&@&Vi%bPrzJxh6PU3w&D%jOj#!<=V#+4QvUNAC5AF
z{Ip-SK)}7h|Jz}-;`Ze}=2>!lBJ_xF{hpm*h&QTfFQ}kpCkkAsW#U-R!BRdX9eIy7
zLRnai2<~f$O4T#DG<WZe#$a|^eQ-nqaICaaIAw$3CK7OkNHLeAjkVE$5a!0$qo>_@
zZ4v#=S;?*LuFI8nC)ZY7%^|0HlOe!bic&D8Ot~y(t4@pb6XUx=B)%RGuO6Q^B*p2$
zDnv1@c4&Hzt*7&fp)inQXoF*xoGhRBc>ETG8e`5HzVGMYcAMs}n2owKFWBQN-|(j=
zsF;JGGKaSo0SaJ4W>F<KU4&HXh*#E7=hyuqEH6}G=%U-PjXSxYV&c6ari%Ek1cSJx
zQ^EmnnV+mB^7vvx3G5B<Dh$UIH4Rh)fKzOf@^xUPHg*;IEL>ohLgnvPxqCU&viBRq
zqosL#81HDwjiYSByYL@b=K02YfL!(}L1B4WzqThV&JZ$r;bc{SN4j6@bz>WRLS4so
zZNJB*5F4|xx#8I<&Q{I0;R&c@`$swnBYW!1L-oWBgTS45gC_gj5`}V6(N?qNf>7=m
zWFs9YOGAs;20$l`mk>Ns(=LT+Fnh%B7Ib=2aB-(oHJ-I^w`*8GgrAFHgLpOiU?0k$
zl7H?W!6`9UMicHohlUGh@f)E?jEG4|`~bPu$TB3KN;=dO*3F+}$WK5@X%!%iAbPr&
zwH4qdX)k`m%_AW={R~?pO(_EwGDva8Tarz3bs1>^He^pprtcDBikj^o0;-mk4?dPu
z;-o|NOg8CAA1o>&%jESP9y!%GbJbdiaoBpTyAC6b1n2<(u#4>|^5FS`P1g0+wq33`
z7!<|U*C&jaA-d6!qFb$>R8oP=mpM#6+rC`!Lf`J&6L2%Zx&`v&JL{A<QWG-)3V=hv
z9tMpwx4?okYVP;p14rXxE_3W6k821T66na~x_S=}bINVrB`V?G<(+Ad*M*G<@tk=5
z(NT^+kEn<1h(sioI|cni;{}q{C6TIM%gU75zw#*qY3}%i&vQ3xiU1>*>@L*`QZXLe
zfmYtOb*m0a4OeP*SLJ(CJwt`EiosA3X>t*AAlf=zw}gz0o;QF&CHt-x1QWY%wIJT3
zE91(8Ox+JMTZ+Ny%_svS{bh?OCuKs&>dVi@y_2Zb6o0`qg-tKgrQ2(79&*EVImdG}
zLEyD4*#tdpF*jcqrKMNab(^md<-*O9s8b@HDUyV}(e)9?UlHZsWoacuA!~TBZE0in
zq@Lox>}(BQnYAQlv32mYaDSB)7dNP{ZGy=^h9a~%an`KC!9}A3$y~Nd@i<}PI0+GD
zkFM&}htVaRp2n*g|J<K4c_`A46(`@4LR5C!@@~@mWcJI^;cAg9>y1z>%3|eohCPG>
z^bu>4mG*-?YF6oY3|b<H;x`)Md8rgfchpv2HH0Lnx*hDWUYh~w%{3pVED#o8?L1qX
zsJ|G1?n{)BNvJOZvvppRGIgk#&m&AI108(SfIvOv3tuojt>miD$cjfKnVf=Gi7R^U
z)~aDL!|uYSOHn&cgq03`<x6*hxK;ejCGte!fhX(A4OFT+uE%d%pr@E5x*)nG`AiB2
z8Sq=sP+zB}m&@NvEz+RCqD?dF_Y;0T?<SkQaJ&|kU$Bzwob!HD89OX8tPkFjMiJ>r
znAb>8EZv5lBBut?83ZjvZ1kX=%B*7Il5)5p2+Z+W(57fQtYBAi;l)O{_M0c>l*hF&
zC_f;EGf=PLOPganGCu*s?2bC{+x0l-G1~6Okcv|h4C$76gr@>;>~4|am&aVSGO%-Z
z8GPg2-#g%V=%Pv_6l*&_<?wEKATR;mZH9)M0^nbjLQEqWetJtM$XK!Mp*)5^J7Oq6
zc!o(1LyB-Zisu&zP}a+kozL<z^py~_%(}OjEv`A|d>v*AB6;0?Jwx4=vpniBkx&w+
z&5JTV1hejP3WFtBx6q=&0PM}KFNS=k+xd={9fyJQnL=rhGAbomvZW*M8g579`!*}t
zsDQQdBkRFM&^SKuvb18+DW*DJg=(rUdGckHHOZJ;|2<y|4<QVdh{(G{C-^!mM1T$%
zKR|7UeMqB@>Fhl6rCVh0fV#pGjhJ`WP3g*qRL8I?D3#_2ZD!=60`6O6f}O9G6Ub?q
zi}Lc>t$d8tXc^^|HhZB_4#H-HnjV`)p%vDnTdouw$BsEN<;Vt&so-vtu&^DY%op!@
zT!qP|l#K=(0M<@Jzz02qX$0w><Qyjxx)Lws8!wYJ0|7qxqo6&uFHGbU2Qe-B8iVz%
zEVeP##abG8II9y)U*E}Q=@aefba29=YwyhOEuO{}D7YOBZ{nx<?H5TC?G2VI>4}lF
zquHF=Hu|T<BO|;W2WJXVrega)V3fXfz*NoieZy25f7-HH<Iw#<28b^yHZRPc8Kd<~
zBD|WCkWk@R#C~s<TG6R}H$4+BDpO3lke`>+JKGO%=?6ZoZA?S-enhrC{m9IKw)L9O
z<-#p94hi@Q;gOVi@J#od-E;tW${<mx5h7GG8B<yRa?kgLUIzA-MEfn^#r;({=}f2s
zIPao^J%%`2Z|M79$=GFoA!DDb{j-ey>oLRMGWLHt-v52@H-X;-eiQgj;5UJPBLd+5
z{~%*;+GLK(ROC}(7bs%m-#S)^$V%_k`{Lq<hwdm<YLDWaH=Ko?{@CTfuomO&r@C98
z^M!VXV1Pit%cLFY#EN%D>TR(J#=npO?mV+P=#q-sy0pR%Ap~wPOChQc_;v}5OI1U=
z)U!KaOtWEcmE>HT@<Rj5b1uK~R$~0k8VzXQ>`Axd{2e91vgE^rg$3yOnz2P`=q4Q_
zB4ON4tZ9H@u&&6nt>5uffyPV!fi^8o7<UIh_cW*FzRd%=5#1$C{&6ru8WqLQZf43r
ztXi|!)_&HvPd8$u!7)~aK|(7du$`?Y^({c<cpiJV7xE`KIBf7SRz-DBxo*@k??%38
zkiYgJ2C|n1MzK*wVy?hXdn|VrePHw}jn@Z8uGrMZAda-?L;?|q%-O>f9R$Z->zYyN
zaUy`SP{b~ZV$%z4*T-yO*{BXEjo!Se-q8!JCQpm_s&-^ZgOnwV!^<-e^WcT@m|#{Q
zr3J#EsBim>z#N9r{5Z24QFrQZ@+bGC-<nv}wx+grklexqfj+<GBFp2K*ybcTbc8<N
z{fIFrXJlt|W^%F$WU6$zaZodxx9yrQ9x(n&`-&Qb>0hNyaOOOh_)RQ9Pg=MRF=cri
z%pImes69tCK|9^$^@#3GH{)#yrJa9;6o%agB%hnGP*rN7mu6T8<FxAyY`siM^-uj$
zhq{u^>2_z9X-?*jz=1#`Q-L0udL7qBX|>e?Ifi#yY8_~$Mv}?mkNLI_zAj#=#(FhO
z$WJR62Gvh@gu^nY*0^=UVAW0=j~Bo6$Z7-B`c1PH&-S)k*z4usE?d{gpuo+Q_L=K{
zzty?5uiDt<hbXc)ZuCg0>scW^s4CW!?yqF|?tU({`x-AcU(V>3P7}@zw<PZDKn%Qf
z#n>|EDaY((W9v+87}I|r)hF>9%C6O#<))Ht9R-BuDb)1Z25pYJj=Ih7zbPTix@ONY
z7KX4Qyu`Oa^Mx{LqppGGr_0$pzl!wU={)cP*g(*orGA2^?y^TZ7r=dl5Ol0?6nZKp
zgV-5SVuoLODn&w$j+tE@Ahxjgb3&*+b^_24$Vh-?t(QmU5yT9D)&myL3k>;q8~R2B
z?DM=7HJ}}j)d_Pywebhw2hFn~t0h;R(Pb<S!}p3{&rqLaDG0CP*Yt9^K8t<4x^r<a
zHsXz;5a)sehG{<{FQ!ky!kP6|3^-wjJQOiq?57Xm<?2*bws0hO*)yNf+TG}}gYxng
z=SiQ;cSGOQqP}m*9J-Q1XLXPXBsb+&UE0;a`orK#CXB4);eM^*^(CTUricc*B-sYX
z)|X{@v4Rt;q%Y#vVb&)hHBnRPoS>5_Cw$N-l&$etk$rA-GHX2*XdOb3vVXdPFSYVq
zP4xzuCB;s}M!OnZ>EQC`%>@lfM=y#++xngzn3l?@OQx1alu<{~eh<;%aV#Qy8mp^l
zwU54~A2aw(po{yo9a&I#z_~&XHd#z2VU4he{_Y7Zv=F$=F1ObRfOFnquts$`3ca0c
z9Ply7D9vl%OJvr#e`yaIhGv=jOcl!3<v5Rhw6K|xq<M9irbA`;m0<eM<clOqEvnTV
z@e=mOS3tu{#h37+OZIv_qS(dpDtc;Owu)mJl4CIJ6-(nzCY~ED>&j~$27z{O@&)P9
z{I9LZCOz7_EXWyje*3E{f;$N_dJ&hIaOChT8H}qYJNXe{cbZh_;j9gM;Hqa`8k@XD
zL*^UdbvcRg*e?FSh?x~<=55VeKS$V^Bd55VCptjjjHyFBs)%DX{1on4V-u%rXY0~|
zlGV4n26yC<+INA+*luP4`P3psLgYt0OYmmXUr_njCz8iSG%5v0W(P*!5HB1ZqCEL5
zu7(DQHRRFp-Q;A=B#UGjb~zO@TvTLng-u+8_tObeQ&_M!ptjSAKjIuRWs^$*;Is>1
z>6pH=vyrLoo-64sZ!q?6%uVxZic1PD=IcI?-o3qt{A8?DhT_b4fGSVv1ypj0Wuu(s
zK=pQ+BSolnS}f~NBs&2Xu?{dDyyP++#SO@g7%iu>dCH#lX+0Tn`ZGLR8R$Cr4^|Ni
zZ{w32Uz(M#LDF>h6mBC^rjW-=TO03k&7i+sy38bvnF@S$w}0%3%q9Z_84XejAIa)9
zs5qlo`??!eyxNZs<}PC+yY*4|CJb1=I)RpTgW+RmJXE97>9a|gmi0*k91kInEPH-3
zUoEVd<bk!HCKn%C%)W_f8aAqyL7!84<75tK&{4CA^`xI173R%nNm=`IFE{a20YMsO
z_ePG9VdgX{iWw$QQf9Lm#l<;<$eJ8O@t%h)ti2-wiB<30w5^z=lgU&nR>sFGc@pG=
zP%@i#Y~V+UZu+u#nmh4Nc*L!Rcr&0gW^(*emu0yqezmOyCiP~+qVyfy>MQ+jM180C
z)%><+L|<Drt}yX>kJ@8_wQ6trh5bpG?;BJoqtf(3osN&r7waI@yO(7iSCY)*UrkeS
zwnv=Ff*Ikk&Kv%p_TB<Wj%>*mEKo5sGc&80nVFfHnVFdxRLl%gF*7qWOT~=(>ete<
zdHbe&)@;B1+1XW?rFo=Zk4U!@aWg%Sd-xR=ja6G2_3?sp?%+0)TI}$SxG%*>aGQ0K
z=A6m!NgCbTOTm{4P5|jYy?L!~jol|5Im6634%deVw_-af%Zr?9#R~)#bBN>IrEF7@
zY77%cfSO~PIhP@DMVH$vgsBT#ua24)ZTdW~EHE^g`_9lvYy_6qzEXXR$hSQ`6^V6!
zO(`tqVEJx{jmZ@q-{xyrLgik;4{dF7;RtNng(?nMa;XMSvkbNNF8?3`+y@+sIlr9S
z$|2=S>TPvV-IdC1$*8d_Acw4sh!F~*kfc*4ZIc}`9y+yjD8ZsBsAo12F*^>#(S8&A
zmQMfNGeAeGIY!0XM_Z*n2u<N;Dvy%|2i=y9JQv{xNY12KxGUVEfNl(++{}3z1gD!v
z7NOh!DmZIwl5x}7)YZJHxF1}ku>x+KWVaAvj~UoGBs1w*0<9_ioTI=vr&9IEe4bmW
zm&>%bf2K1+-ZIgpugcDZ9GEA4$Tfn#tPx;Y?-@O5YXB^8J#6BvvAW+J-)$3obO5b=
z&-E2R9oU$gEOlxD5RfE*CRW-OkqR|aizpv7xqAi%1-hlji?OdEFzz@kKb>E%yFNmq
zeEj{<Ub{e5pmnY}5h>Sz$L|!bwob;N0_?=>>S%i0_R=!|!8iQ#RfU3IXWLiLiizw(
zI!|_(3h8@wjN6`CWLxpCY=ZBtKT1wTUo|knx%IZF5SdYE?WC54(%HwNvKU6ght(Nb
zP`5FfAfq<Cfv^r#0P1Y3ebQ=f<B<&gWIp1do~Lx+6djvfwB~?oNvQR8S0&`ES!z_5
zJ#q%Cn5C_SB7IA2NfAUGRPEP?b;|70lQ`+DX`zgzdd{+)-3#(vlw_H}`Lx0`C;|WG
z)1O5;FV=E`BpS1Rn_ePU1kRht2J)$PsaPI3Pcz2*)qS|jWYDTGEmVkxkOg&;v@umF
z0clNJ@M{AX&IV~j?>*=q`X>6Jnu^3GOtei3-!ohoUDZQjE)+%=>-9p<BCS^+*wpwN
z2We-d?OxbA$NQFJD<D#0))*bzN!ST?!GZwq@wo$4;&8GF(3{69<vKn9l-$N4ecdJc
z#=_0h*igy4^QlzcScc7IGKOsWls1I`XTVzTUKu|u?Omjg>fKdBQ1|xJm>Xbdl<WQ$
zgEl1dAV`3546qN(o8E0LdDs?bOET~z#`7pSY5k<QLD#0*#oKn9`=<r*_xVIzI0STh
zb$QYSqfZ@vqK^9V!jDrGZvbn%EpDcCf%`G1R>sG;c6A77{u|vQn=`GGIwo}Ykkr##
zS7BQpbf>c3USSx5Uf4~h#fLzc?!4OUV1_lPz?e9b6FSn14=GC=RzOZh(?!zYa*h?M
zBFNuJk_fW4P~OqdK0yWILJ=3)7+!7Yv6Q%NF6(QT>t1jkZ=V@b!_!VnNz!=EuaIpG
zNCWxtGq_V4X56VPsierxFlm#D?#J6HJ1-<`U~C<{DU(_AU4g(<cKf{3bsxPi8Bxo4
z!=cl>0jwY@5;yK^=v(4vtTtZP>KY;$viRbHmD|g|sltp?U!PFmjG|5Kduy;Dnm)=B
zFZPDp&+S4Hu8#ng2kSx=&vit6b)Aenrd-o;$7(7XjPtR0jGi2MYq`U$Y`{n?yAf2e
zk^pEp&75pLr<!uo!Uyh(G_4^d+DA}35h`Zyrl?8U@;2xLqDaH4I3p&4wXs3SY3b$G
zp5^AHO@g7?5k>b7ihPKGC388&u`Dsp%stFpSP;rjI?h+Z%{%hf_k=OGO3|Pz*%mUi
zKVQ-yAk+%T^z8KEXC5HpT;Qiq?1K1O&7@j*+Bm{9Vl3jY=|==ApO${Stk*k5i-yzd
zXg<TsQt?S_C4J*82(h7B<tC>?E!08B?}Q*r)1FUyA&jdLh(t7a%7ig^Yb(kFy|>Js
zKi1_kCi%9p+>onu>Z-Qk9DUmdu9ZV<vcPGXW=qUqhljs$#BEFnV_|OUnckTA@?rL7
z`!nlnCoD?&^t{$#=8Jp}Yo4J*sgquT0_2?yeEA#sTY$d~dX!H=LDOd~5eWlqJ`Us?
z`U@<8bszsMUqEH_g{QO*!Nrq-He-i^!*mu(W3AM~>ozsW=x%;MHgG{M-_sj!eimnl
zd8LmOXkPS+7aYg8CT9;rtU4I8TV?>BAu%cm&p;+#AzmcP?E{HvPLB{;@%3)j@(-0b
zV`L}tXN67GmMy0EJOfON#S-C~+Y8Ta#`rblatkk^SM)Ypoo{Dfbc@eykGQqZm8C%0
zZ8o@vHQVU@bBoEe)=5Y5w~5pA>}_nJGcbw94a!cF4D(vJjE4!muE2XuiZmzGVKJQV
zu<k~~tdn+;DEB)eA+OHsb@w+KVe&Q1_qntoO0_F-e>}@cqkB}>kkHo&%63zNqXOV&
zS--w#m+(xWdX$p;<k1>!tW?C*dW4r#1@}!+2F+${G7Ug?PP-^;*c!FmtC`Af^0g2H
zYJ4J&Rm(d950szlQ%Evsx4Tp17@e*8j$D$v4P7#AnaPF<v9q$bekTR;tex2?LE_tS
z^6p1A?iYhSrWfA?p_s1QXmom0K6T5_kQE!Lp)m(?hehG>5xeaziu!pQb@`Q<jUT0&
z5n)xS=;Pc5{8T3qa$DZY<lZuJR|(cXBu;@OtyA$TY}(ECOp`JVnbqoxw7rH9y=#MH
z-EZGN9|PizbS936VyAwrE3P+j#|8@?>yqif8XF)sGV^c7kbbRP>f<|KJFWO6W6#Z$
zm$|a2W2QAQ`JwiXZf=vU%9Ri06W&N}0xgZ__RJpSA=sRAK#nnDcBxd<2hCIuLOM?`
z@wzKGa3{O{N1`m5!T`baYh|20fH8J9y`b(PKO1Ul)ScAMxBf5DCE{bG3MsYn`n^$=
zMhTw%_s&)P4Q1`i-E!3LLPx4;)7HnJ=YZ0QA+GY>Li$2cuQ#PrC_1ud3owLvmJSc4
zKm|m=?Ld@lXfKT;OftcYUgt?89N%Qv=!6rXH2$krmcLr^{gQv#n(zNmtHM9p0rLHL
z{ZXLz$MzpuosyaVnfCa@)j!PsvHg21z`t*G`d7J@>c2>ysM`N)uH_d_|8(G&BfsTu
z1b!p%8-d>l{6^q60{<UF06geNMB?}36?FOiyCYT$-l<D`r~|Ak!~*u*e1-!)6ebW~
zYZscoO-IkPF;J<M7f3gYubh-=X+qpRRQ)b1<L6JPW+Mggp^jgz(s<u+pHL>zvQ-Bk
zB6E?Z_9nNfX5i_}RYF3<kvPcl?74V)kTdf-GC<L&`cJxal+en0#G#HpkYwioLv0{J
zX(t4*0`A6osL_|J##XV}LbJj)VtD6XCl_UKv~NYRT@`?pqSX|IadC;6Qa72F_3lzz
zgZ1|57Y8jbr}9EV_X*|0z81B2w)&K>*GUy&l)88otsUU@AY^WIW#mJ73d&5UZbKjp
z^qKRKQpg8ss)cjy4tbedm}xtFfIGO<xGEPbr)H|=8Um}QGwe9~)~(sXJpT}r6rg2a
zfpG`*F89)}ZRUO?HB^<I&u3{?y+pg?Ruz)zLl1u2+F$2P&^bdMqWs7VYjki-u9JPP
zo*U?PQWXjY|N7?IvDhzUO{ZF>K8J;Q?WZ_G@lXfc#w-|-6ZS1N5l82i?X#>a-cBxv
zcRVvTXFPF!?Lo%~(JKJ{4K4**q|WA9gQoE62AT|q8<K4avYF0ao=Hk1D7L-p@J^Rd
z>-3C0g9AblEUQOT4P8yD7UZIwS7`7kemhiLwH+?H8Eph$0<@XIqwk!}Q;K2w_f9(k
zTu3ehW2N&n$P&f;Vo^chj$WO;8YX63{d;<75F!@XOqbm-;?MQ;<-2Ip85wLL_A**-
zq^`n@&TM9348F9StYz$e-azrgu-Pm8vjw*7!5^_scyE0{EM>q_p7#esEnnxaY%fg7
z=SJDJC8Kr_T2zrGzj}!$ff#hI?)NM_6U9b2Q_>s_wtP*IR6WMj?M3Xlyp%Hhbsu6{
z?w`~3cxpco(&C6Uy2g5czTiHK9#0u&KRlx`vOd3p>>t*Kclj_4vVt7=#COl=o{avM
z8gg@4vDBB!<WV+B&I&nZtdeHRhc{sXbkfCc4mhbUTRv%Nh(d)q<f8&W!HwGe9W?DD
zJhm`QSE>zIIbfGo3H7LMhPv-?`t#>m-UWKxru85E(xj2bqRX<ofQ-0suD+9(Q)>tM
zVr+UdWJ~D;Q`5cW2^xPC<Rze4+N$I9i+UbnX4EK=1gF;Ji0+prN8O-T#_nqoO$QG+
zL9njHJu4p6G^lTww3fE98^++hR?vomqnFY+4K{H`7bBddY;AI9D@klp_!me_-|p?M
zy=!y{UZQsuYCX0JkQ1JJ4sazFUx^}Bh4YkCiR)vUYB7-j=tsrdVb0INvpwL}zaj$k
zj3-P%^g=V%L%H#s@Ekk0TSGg8WE@$#%EG|pnsh!s_hm)XP&fE%&Yig4eE?6xUW&Y=
z)LF5<*t{Qn+v4eb=YbB{7_4zkd+hhpoB?KCv-pyK41^igs`dg8?68COjR@O-c^4B(
z*#-Cef?_4&EL=UWg{Gb)vEkYRwxOLi0XvpTO2N<*!ThJ4+ti+N)rue>CXHY}qmjDj
zHKr=fQ>^inF|V4KI~^Z}p3Q@pOX<OQ-db1!CrdBHqs0;jj|-O?f7l4W6}M&W)SmP+
zTc(Xp$9B9IwKOwOWktUaX9+T!jTl3f_R{dY9k0v9rM8r60tTILOWsy8sOA0VbNd^w
z^rh&C>UHK6H_R3mCL?l!6Y~w>bOGsPPgsCm(Y!jxQA9cgqKoM41yZoH7G-Cxv=s}Z
zsuAG|*M*CGIUc8a&1I@tzwX0bz@2V1h9@kvfJr~VLi_~Lfe%X-m^4(jpR3&`($DfQ
zxC$S@E<Pj<8EtnEzurN{Gt%&k=~MG~pzRbc7ORi&HURISwiC_6lZY>Ex~lHs$1A2{
zcPwq&ur}&{%q9wQ>uv%5!T6uU>qv(75gIYvnXe9r;u>gE+d9ZG?P3?)tLVu{w)uPz
z*hmmZRfst|aOZJKZB&<RO?&4ZHW%&()No@9H(dS1JH@p1I!}Ys>5ItyX?}0gV%iu)
zRq19qp7P4ISs8d)M4Tj6olu^z1iC*5yGVlrk}IfN8Wxgu`9P$kpy@(c07Q7<yJk@I
z#w?BSMbQO(TX&rG5H<iVlHrF1m=#D*R}EWg5;+hPxdQrJuPNKTX;@9D4)EwHmt!_Z
zO>OcUJS`a_veZ!|smo7GT$0&&eizM^;;A23nON=ztcUEa>FAK74rjuhQ$jVgcx6O;
zgQJ=3IOsY_S}HI*(HmSP>oi>NZBLzCVIQu|AM|d4t(?i^k~Cs;)k)f!K$(UI+3LV!
z65c7oE9#8zn#H|U+}s6-Y0If}{8go6K}Xc~uNUF;7mAwf#cRx)BN!ZozaLRAODutI
zDo;DOZgf8^RG;c)SbqPKO)ja>H#BV#1X)oG%d6`;i@*QwZR6DcQOBretgqHUoQMsH
z3Oiab9I?h^_2w!5tu%&$AY$@0m^xbWygdyKWKzLOPvmn!mRwKP+{IZkx#a`$$J5<>
z&w@8|16`SJPWm3J!Vr@cJ%de2CtSli?5*02{zC$I%N?OJDg0dryiyxv?!i#6cMoOo
z!+^E#!ro)<C@{<7D*!fn9v;5Wnv0!xx=Xe}hD<67B?;9-XFr0C>&u-}HUwNJ5pQY*
z>y~A@45{YWL}3i~CcY{<72yW8p+PkLGcqM71Tgs!<jEc%4NQz8(QS6zMC}SyOD+Cz
zAE>pDJN2wqW+Ow7;638<N&67qNq`tzdZGzlx%L;%HUp}?i;DwkVca*?81QDvGO)$;
z(6ak*(uEZ0I%t^4d>mr!0B%iy?w^WSk4%|4qH2l~47rC6TeT7)PICZbk?2jH%#3Gf
zm&84vkfgOhHXy687Tb0w@u5M350-GnzLT=TtFk<s+O<|QDxKDRL0a~TTO820f6%10
zQM#H7?TIftyyOQdU3!(Y6Ny976l0bC&)FVOfPe56KS>KffIry~K)^p0b3fy<U-B>6
zo_|}Y10>+jGw}QiPx9~VOW6M_BR_oxUHv~C^HOBcHTWri{EPzVGyc*u{?s$-{zK37
zOV9LE&-ACB`Inygr=Iyw{l{PWk3aPv|J1Yn(zE^4v;C=Op#R6f4}a+Y82E?$@xebY
z8$j(J&i)VB{#?)-6hQ8$uWZSG@3h&^Ve;|6xAXfsjE(pUk^kI2@248X<L~WCf7-2O
z{JkCgPrG5<pF97D#DB%o{+h49<X_I)|9+NMFyc>$|8Vu6B<a6@Py9Vg`wRBp^Y#}K
z|9j>Cb>51l{QdcV&0DsKzqj-IIZTc8_wy|8r(K8l-`o9~w_h{=-tO1DUB>%);{TtD
zuyL4A3JtEc1zZ}o*M@vx`_bx|pWH@0VLXx&WC%8;rTf#^Y^oy3!jf^Tqyf5pac%J#
z=@4q%hbs=epd%8oM!SkK_V`W9#RRsnAOl=hO)#+X{9Ik6+|-P{I4N|UGAxAkkGT43
z1Zj%fW-zYB9K}a2v#svShLF2dBTFNk5czP+B|t&KE~QIccR$CVhLk=pObqS=^CyrC
zQp@LldE*$WgSjlf4cc#R^#?MMGx~%bE6rr$DK_zb1T~>?BMHeFS38MNGd9g9Bwg_h
zf$~fXND2!O6$XZG0@<Ykv0jlvic)9y(xzN=AT60Ikb07c4}(-N%X6##B8b$etR?Hd
z1sZFVBXe;?BA{yB#IQ4SW{{QO>@t{{oW)l?Epz)I-gLL-(RYv!1n-*cM(c|PjK8k$
zQEmIJv4>0mW<u$PgVN558ByB%xnuINmAvXGI~TRgi_2NN3@|i{|6@S`#HTTI+*4~#
zxApa}$|~e$VNSzPrT&c?Kr6PC_4b5k7jw2sKIGf)3cT-4WlbIc**UiJ4GL(UmQCgX
z$Rqky24r7U-){mJ1RGEN+eu-w*5ScfB9H;WKG}dwNNcEj1S4m`o+H1ALV<90<D1^}
z4xk~fXb`>16)2=)LqnoT<3JN`B%}8vg^fx?@3eJJw(<BXdU4B&!CcIIP|WT)C{q<<
z)xmbn6?s2pqyo2H3IPt8&H}OE&!FHQmvDExDCgwlYlZ8XrWA^DAxddI-E6X-KEq%i
z(aLD4jX)%nW7GZ+W;eyqx?MS{Mp>>-e78W@vn0p5=)0^3a?!%~ybQRwE_rb*{8?rR
zgq%UjdpNmxSQDnQf1qT)wfu=P=jBk27|l5Su8>GBA$bME`cCMvt0uIxy2v2U=Lr;0
z0L(}aPi{{bY}()`SukWXUplyR=bpNwf-Ea~HB>(m=q@GNvF4nqlue@0QPl!}{7Wk_
z;|V(9kS`f%Yr5;iEbTo@1WdBEEKN^wg$Ba;g!TP$cV1L9hQLJrT#HSI43nkKb+J!m
zHN(q*=U3o^4_{0<x%~U%8x+@;6O+9w!#In)g2Sh0>-HU-<_W48xGWP9Ycy!^L8#4h
z4jZCq>Gt-w1JZoBS`M1xowQ-3+$Ri{-d)*AWPJSu*y0R$X_0zbtDm=GmGP&ax2jse
zy%U^34i$kPr9$NSDtS|Ts^XRhAddLP&$@K;L<&3`dl_5xQKI++4;UiIJgvHP_u?{v
z`(BvQB<^U=UNM_n$SA=F(Gx7oV9WwzUSDA^IU_j8)<Rqh48l_0R8+EvQhv5=sG
z?16$`rM`lZ($2Mka$31e4KS3fK;&?K)|PGZQXSZI1~e)Y8Uuisdh^4QN%XX)4Q0Z@
zCWaBG`M_rSEhaFm6Ysp!2fRF8ux!KSMX*C3KR$MNO`q?{Wsgc|;h75ldOx9z{YSOM
zdE{ysJ#S?yMM7HI>&$?5=W3+4JvgIQzmDE0Go{jKx+e7!5FXZEH4wpL%9#wQ2pwhI
zT~{<d)g7cJ5Yaw1vJ&&=7yE-}JQAD(G+{;^vCxG~T%1%rmaw6ZyqkDst)^g`(`9C=
zUR%(~d{;!ni>enQ8~{)B$DT^?>?sbPE{9`ho=@uUjk0`0H^A%1rny|`rHIq!g+&Zq
z1U<%Lr>=m)ZXU0P_@kRScRNKe;`4cz;8n0T9~5_}Zswz8Ce9l)6uz<7kZ`5T&RwVr
zQEHjaMdcUZX`!RJjQA~zw_@ak1s3(O>FFu1)skS*d`YiM@(DS2l#}tY=xL{#F?_TT
zhe5D1<SmhD9BA;RoiTt~tfWdk+}BEp%450{6Q*EzbciTK1G^eJeUUfcM-)Bv!Wi5!
z#jZ>-p<wCB3M-d;%gR0vIbb!Q#I8Fh!gN+O$z;b&>y_XhACsCaVAcKDwoNmmdZ2qp
zcs-dbO7{Za#XzJY6#R;cBP^z`9=whrO58<zbl3XLKejV-+y_XRs3tJAYps^MF`v>i
zR~h(bQO{hqY0EjEj=z?arP*52IN_7#@-#<fwWMjTfBQK&0QiAnL&eDzTbM-^$0cdK
zvw+B@oT7w%u2~7fxF}iIi6ur-OQY>XKXEtEdTLfejPcN0WuY`xrbY2QOU&9M$CsdH
z?79rk8X4<0yVP$%cryKUiG(2?SgtSUgB>_d5GaBd;!5AXyzZ3r1}^CszJ*g=@#B|^
zJA9NcBId^4{sL#whmEl@C%-Kx-(m(xp?!rfJ5W;HxJC48Q<`TMq%9rh83(t+#MK~)
zcuxzv$Wne2<4b2q|D_7isT~vgTsC{vG91eq61I7EsqvW{r5YFgDduCaSo@{iSpr0f
zVXMDGP$Mi+y&p8&0Tx0NabKvDXf7n<Wa*HwT=geOltA^t{Hp>rao}?wbdHIgI)cPX
zrX7!fMCb$7DoC=#rHt9<wlSSGX+|kjkn*!l%G~}fg^`Mt%{;S@Dbk7iksnN+@3@CW
zLi~IdP6^wL6z}OP-_CTq9NVez6Jll85ac@NLMFVCP*@a=@FoglIfCb?=ey-Yw?)Cf
z>WpN6MuvM(*=@_E3u^Pl?pvD?AqvD*f0WBzom~rhmR$V^n2-jl9L<gc<lhxKxWr5L
ze8(lOaoi$|SzGOnMlj|0a4;=gCbAqa&j#KM%6O@8w8==5JfBV7^TFzs8RkOZtw8i6
z?W#*Rf628|+2<jV5v%A%q~2J?q+s^E>v9MiQ>x<6CGiVt#&FX4-ODb3<P--Gd#Fs%
zl{^sjx^1`3F>Qx_jA@#z^MMl{mqbBv*+#z6&A5Yl4F!oAe^nb{2L7LiQ;rG$8kPO}
zck<7$|35$MxAr#zzY+M2z;6V8Bk&u6|M3Wb^Zdu!h#*vyZLujm&<O90UL;Zy{oxkV
zvlwL_VNHDQ37353b4YT_)v1P$+=Wwt4LoE*_z_zjpRe=t-f`9wITXVQ3o!kmo7G&j
zr+4#gr+WK3R<Ka{G8f66Ri<o#!5*dtE0|TcjQl1xZi-E&MwevgG2kGV$7TbucW3l6
zZm6j9_04l7&5=?%wq1NDtW~m_vDoE`97uRpraK&uKWqT3#VTL{#Bq{hBNAfvKRrhP
zK*UsbP}2#CB8VsnhCm2t;JM<G@WCR5V-DV)+ekw@FrhoaM*vlEHz>hYW--^ut{6=l
zkE<5J@S=eQzi%u;FUFuF>o<iga$^MhAD=gs7BupF;WiVJ5#CW5rLJ^^|IlcVc%AHy
zSM{2KV_na#${Y?Pwy_X>>xl0sQ9r4qB{>J1(z`VO*m$hYSD69V+OiiszY4PeTQ%fw
zhthqJOeg?Wn2gFvJp9cG*@1`R)!+=u(ko6v55`&-;l^}l4Tm^#s6EwO<%!=d+lJ*E
zM+85(9_tIL<zu)i$k(`Tjbhp8kCgo^Cy%A8cee|OqDCN=QP-hWShA7_y7}t>3j3V;
zsBel(6Xq^H*|6TKf>j**&0-o4C<wQp(3O)#4w#tkXJ7B9L*qT@%$6qD-tCp=<FV>L
z0Ryn(f`RW4Ids9lNEdFokl$Sh#WEf5^xat=QmYSGSh;U}c1m@%pNwlfEdYu;k8#57
zxksQ(Pz_GVjzD_c7@6ClxP<bD9Zx!x1t4-@D5f<32DsP@3c9XS-D%s)umq8zH`-@r
z`#jFB=W8qFk1o~_?K82}?om=u#McKUZ$rghj)6N1I=gr2=&a6%ih~_6+=b^4P4?|q
zPC8M7O#d5IsmW*CEQ(xG&t944bV*)Ny*Tlwx<TW;c*(JT*5Ec3rTeytHhnqhWrJK(
zcAk$g2rZ5}lO!X0O)QW1<XW{Lu5td9;~DkMa%a~nOd_6wUPxz{b^KpUXM&(<v+`nZ
zonvFkGR0z-CZ)I}c9q_JDpe<^E6y=pXrw=SBz3(?HNk9|G?z+(9;|c(X32LXuFniO
zPn>m^+kO*Po3$h!z32-ua9JtDL>21Z$zPjW@5??2&bG7nF!?qyP}hheq||WWQDq#4
zX>eym+X`Jh`Mrn4I=Z_6><+l$lc_ob?{jsLI965{oqr3G+ec8MQNJhWk;`b{dC)4O
z_3XSG@0)JCJu|niGXGQ%?5@7foi;o_Eb0XD>s>RTl6roU?Xvf)0&hVqZWS(fwp|*6
ziOi;wE+&se^f^+ENe{<&bmnQdS~HJf30F=IYkgH?{Z{CHSpH%;&q2U~0DFr}IXBBj
z?~7;u?@~1k;}nH%{y?2B4(LYihI-qGDP)W59~bibyZF{BcO);lZFEibDYl4<3k|TJ
z=OB_0+iE#ZO`@@Bs64!7;Z0T}I@3^$m%Zpw(;{Q7AEHcLT^;H0ea-I>=(`W_L{;Yx
zP?;OMq70-6FOuiujbOA5muE>D!q|ttDOYRZ*Rl<%MW$ilLk4v1zl4X#OM@p?^v~nD
z3B)?RUYD|{m#Qjh+STXy%PG84GBn?7ZMiTG2L@(ox~=z=*nuWwjz&U+>KqgTNp1An
zRgWY9^6uR4UFOpV%ao$likiyt;gm35r!+pECQ>xZE4-(#@*?GCURpPx9PwF^`tBC-
zsz(xJ05DxyFS?<qW=&k6vYP+sfc9stSYcY~Cmb9e4`F-O@^j5l0$?PS9THcub;qL^
z$j<lfhUOR%B-`Y83x&G{su8668DZRv<wxvx4r*fMZb;xJPD6;U&-qDr_TV8s3>R)G
zI?)OZU$tF?QbU*hs^Oaau_zx{>l8-IuWO-ozgpZ}!>rJ>#&s5nZP@}VRD|!|M@HyX
zkr*=Vz=wjkVnRKS;23iFhjx}r%S$$^w>1=}q3CU=gYQTvu`IaX*D#`6>ts?uID2~}
z9T>mI9r8{_rY-><v%Z>ouYX@TG6k9}a?f+>@RK+mG&<J5Bx<9XwQOS>LeIX<dp#3z
zY}d{e=Vk0(<OLQ=PJz2`4fEDBqu#W+YN?LTI%wH4#3do$C@AKhTw5I@-one1ylmsV
zP}U|JamscLWerg9hjUaQ!c|O<=}RCAGrK16R7$!px)Mn78N6Znihg2MKNVu*#8Fw|
zVjzaQYFPb*nId?fgpAi~Vfj^@9MZ1s$0It94%0`J{P3T@CPcGQwb(YOBOTEzf9?8A
zZLx<>@LY=wzcrEg*yW}i27e(P+0-w#i)gs7#eCeGwghpd-NcPJ)r4+12@9S7fuUxk
zkof_n*vvfp`wa@Lu>_&cfXfIHjge}QDJ&tJ`~b0H7-QfEiuP?5ac5V!TMth`e2L<N
z9DK9w`<>|N_-_PZP4*^B5yGe|gmCy;6hK!+){>`OVvn`FPukDG&XF39DY&ktE=uVk
z^cNrlS#$W=rsc&3K4=jp6N;P>72tv4IL~tq>b6^3G9-3y!Hu2~Kf0c3d0Q{-`FC9E
zjHR~U&a;u4JtE}RiZF&JE9re}pnDR735JBJu2Oz#_fuNO-T<t^iuIEG5nkk9#;3m9
zGzvJqsIdV)mr)#@WxCTx-L-^hlRIbYSsfz~iHU@3RO#{7fL_EAl|)HKx?jUX!a!w`
z9>to<5R%F!xvsF;8!H|<rZua=KkQ4%m8u%*`k_P?Mo#4Iq#Hu>G8@#mBHB~MjM_5j
zDX4z*;dBtnPI-g@uuCcHS$8E`%EozViOSexN%W3IFp)VF7S-HMY}ICgxcU@SpUQ_#
z6Y=-F1R%iQS2eu<jG=zXzs!#QTUEna{+|rSH;DfjhW&fCGVDJN|F`e|SJ~oUIQ)`-
ziN}9`ws@%W&kOK}tN)~p{vH4Sd)eau_6zgdf!_%HM&LIBzY+M~ia;3bp<HK%eW!+&
zrj^cWAo%`y4Rz1f_3qp}%sifR#te!>V;)-=Usxe<!+kTQm!hx2EdB1GEdzb$LDEj}
zr!!wvo_@B*gWycS<l3hFhRZh#Ar1=?9Us3s!?@Io{_v0m<;ebp`=gZ$(JwKiieOeO
zqghZRnTM5r2NK`5MrLXdY4ei4RFSD^1gaLXe7*JndQYkp5(HTMYUjOjdr}vkoBV7k
z9vkbth!Oo2r+)x8PyzptK{d0pdFwnAdm_8-Qw4D%J}u&FCF#K%iniS6sv~!CevO>f
zRw&vX4K~l$)Qa}OP_(au9-E=h@%qglfAlD(J+A^;-EUXV!1a@_ej&95SDy)Br>>&$
zR@_mGfn|@3JXh;6jNMzN?r7o!b}5(k(%aIjtUM$bs2n<EW+If{U6O}4A4-Gk&$n+o
z?~c9y9P}nwBE)2BNOBsGFc=f<lN6Fpu#F?(-fq01-<V>gsXWn|f1STBYA<%{O9M+}
zoyaL1g)58HU+wM9>+9Ue#%M`%m*;k-eX|(ZCR=ByFWgkeGIL?GW>a!)xWO@3h>#mL
zv!6Z&(|V8#`)r?{DJWf#Q2@2=VS3P$T&qgiC?T-Er?|dr<c6@~gyAJC7<W>10Kmj#
zr@YCQT&iCoy|PLv;*d+5Mlu5GjSCO}7}1PlG7O%d;jZ<NFiKvr&HOO@F;uR-KbeMK
z5}b-TL@sPylUnr~yuG)f?klE{qLnX9vRqCiu|5filt!j}IQcuA^35piSMGBd;=PBS
z;P)&9-`W;+RSkZR(bp2>@pNdV=tl}0o>K&2?CRr>9IVQZntK;=tkzp+#uqXB_c)sO
zcmaC@Vu9y|Dk$$n@4DcG<{1dDCnTzAP4Ua}M(^7O?)f<ufxE~bMBa(%QK7QU_*61+
zzZUjz;KTqvx&hw%PNVgl@Inku?;?>pHc2taw%?@aw6ent**Io0d&=O0!g0L;T9IFv
zWq@llfV^|!YhB%}1856AO-Td`=tVH)#b@K|NbS3K-iS&fDh&f|&QPMA+@`mMP3|n!
zD15QOHsLXO6YWuChF#Dt^KV}}h+87AISeosJ;fpnZ3x+U8)LXTzT-W9)(eJJ>mdY}
zWCbxSYkn`E=X#wd0kOMZImWxO#ZF!{0}fte60gkV*~Rb{{v1L?lgJg~Q|+6ctfS9F
zP=pO>4C!C_a*%o1(cIFFluJe9>^88@0|(SxYN}r+T)Vi;QroN)z$>wf@d4hly;%h(
zrz5S)ZSyEwu9on0u^gq4(mY%yF^Q}faaomDxVgkeNBnWw4s2wWt%7wG)^~$)gQURf
z<_uANh84|`Q(<0{!gZ=}7s|gg2ux&e4+o3Mz-~tST_j)r&4j3&lWue%|9H*=11?VE
zYJ+k4Ufa=VT|Ec)TVy2BN|#&;{?w_K1M#u_;wGC~3d_S>^*CW5Ox~Vl6HKDTv^(^%
zXNDk`cq79{+s)1F6Nrx|<n5>MyI|Z`_9R=%Y%eL;%kb)p$u(+Q>ghx0fe{=Dy8`^4
zE_xXeUIN)#it3TJ#*v{O0n?Vya6y9b;^YosU-Z&P3)G^6dZ+eb1gI!MHV#$4-5S3q
za{7{)l&iXD+GPZS8=?^gu}QQfj~O4u0~>D9BxnVg9TIn;2zhFte}XKBU}FGAN?`rQ
zHugRqW1&eiweEQl@7jkCF9S@Sw&LDRdwf{a1*Bka&=0)tT{F)oFLvz(fzPruOH{W$
z7FuePHXco-fRw?sNrjEnwCSQV+;RW_2UFl=VU&IyqKxbeKYG!8d7c|}8dDjpnujYr
z;yxa)mVT8fi*Od)RZbRp8HH74uq!v;+Yc7UPhjq;)!ag9eRWB%j!lmhFTs<nzgX4c
zBj!h=J<$`bbGw@+wvyb!;S&IK*#4G%8rJQ_Wb0?;ygMzLi?lehQw;Q8lcG!)8KUBr
zCO<~%okEZ?=E2>tfy&7tq&hBNkEC_j2^dkg<4Vsom3RuXJxH^&9pwYDq7QM1o6^wE
zLeA7DwjqDQ+WVrnT}G+R;IQQfC|<WCd{Bb}5h(`kW+_OrS4G!H-nwkh-$Z3sk^k&-
zrMb-^225un_5vDrc^qzDK2T)emLg*0(0*mI>6~GT9>fQf1ZTQ^X|3#5o=Z-FKFaWX
z_{Fjh-1t%X1>g}D>o}B`k}O8Gf;{WsfE=Ev&{rks67OO#tCeq6tInaGkjLldNr{+S
zv3=0kb~BfznPems7bHxFBHuo-kTPjsA1~r&LdkggflhlPm77z`b7g8{3PhY|dgFQG
z`Nw+7%E8DDozwNf)Mr*L2^px+ICeE7b>gx&<awbvary(_FxONz7#EaUG+4_ej)V8q
z!L0*B1xxufJ%^zj>J+&-LlAdR5biaUO&(;*BZ?kY;@6+@r#$?;HKLo;eQbL#YIQ7M
zUra-BAyc?SiAq$Wy{>}!CH*(%+4`yGvuL{UDLGIDCR^G|ZCAB)!(J1LrJ1e6VdcE@
z$&^)@PeSnwiP_cwFO!x<GVPu-$A=<L3c)fznPb8!)bZQcB!l@weeoI=hR?Z0`vAqf
zt-!1%WNE{X{#jGwa7v6IMYO?1ZLf(hOhkwBS_ThqE99#}n?+H*K<P^?Tz@wV)OI;z
zZpAi-|2=!r%lPb_BpCt2Y`XQ!cF@FqqmnDz%j-c_D_A92V`Bm;0uk~vA%;YOkuELg
zz>uutAnkG0jm??I2&I!8p1M}!m-KgJtA=VNrNEj=JuDgVrH0(G*<_5)c0XJE7z92&
zI``N9hRGvnTHnHNKh{y2-(4?xDP;E4O=kk6OKmwAZlbd|ai6OrI-N99Q!XZPBhTma
z6W8+FDNJb1C%+L}B%dI$0IleY*oP79Hx5reMIxcRn(0tu08@*@KR!m*dAh{Cwb17g
z*mVJb;7n!4yJp%&r9@52=Q3kUPDesUqwv|&1rdVN*T2&|A=JsoIUU<<y@%@SuxVcH
zZqF_O1rc2e=!#K9H#JEP+t9iU1p}eW6G?G@(iX4MUa);mbM6@ZOx7{HYA-a1U%xNx
z=EWiQupFpNQPI6gO?gRw1C&d@L{%p+G7T}WOYZ1fHcKj_`!IytpbfM1St%R(X#dj1
z*@YLYG|A#K!2`)X1;NKqoJ^<#AO-?((Y#jvJKmte65(SFzk&&Yp<;ToF&-3g8Wqt>
z7nBY7%eW&yaXQzIwHE01Dq$r{X#Xmfg4K15gBum+Kx0>P_!Bu2Zyd)?#AJo@S=mO}
z4G-k=NJNKQ;s|TP@GquRZv{5PNCaoy`S3kq8HY=ap{<N)MTpT`qh9$<;I=J$terKD
zCDrP|p(LrR+Zn2NJSH>tHj-QAZeAV*!2(>j3dPt#_y{`P+RUk7e^rBm{!ez`Uqj1>
z#(%9r`Netsh55f8{QKQ+1b!p%8-d>l{6^q60{?>%fcPWt@AoU`Iym-qIijLuowB@;
zp@pfwH_o|T(Ap+b8Hxdmv?#H>ff6Di5R_r9d)m>7Gk2<1?=aSDdEzvz;!WAxLOvJx
z<GNH0YkBrE9|}Ydf&(XR&5#t9{bXe8AsFC`&#+~vF13ht47QY62cdo=|Mj)yJ1odH
z9hRWj#`wofdF?cuq-6!V`L9bob1j288fduGU!l5^%?>K}`vb%*D)ey$zULH6lXaVJ
z9yn`h)IERtvBu3t@!e^Fyu{soq;7IawQP5_kDm#@s436Vz!U+Mc&!~Q;+W<i9kLsS
zu#{|YG_cHvki5kj>AWsh^vAFiJ$x6hF6BLs)m;v8<~y`7h7XA`sGVX3&s@c;2t4Zh
zbDkI%?bQSpeDuBFI`-hdqHXlwemhz3W_G{Q);?N;-Za{}!B6m3J<R3V%!dN4kHl}8
zTfJOK@0EAj9}!51kQ3^7Pd;IB7)8n<$nG{fb390MQjj~fr~X_7?Yy2M=KLWEnCO+&
z_%a&z5F?cK?KU_|8)t%-k(p2SLIRRf-*{0Jt>r*IL00SOfv!W!_2g-%9e!%UkmZH1
z+m-(&0}Sb|(QoCNsw`mktc!>nLK+@7P&NE}To)4oSw@K!Siw=Bu(e08g*+9Fu@A8b
z5e&=2CE{ZNBy>TrRp&HdmBC^gvPACtw9CcjQTZ&|htuZPq9ktZN3v8d8^?xT@57HO
z;-qaIW#3iTxnO*%kv}g$qD9X)SgBr5rynW<hwZWl^qsXIyC3DhDd|VnLhpVkkUOu3
z3hIOBIaxfW1x_}#x>H00*j9rAVh+d&@>061i=MYy0<j*9u5Zw4H;zv>1Jng1vaUIf
zoEeGnr9}F5AMWxb<t{a5d-9hhE8-eKxJVD(yt<?nK=2iegh^g>>WnGe?#-^^UibIm
z>ToiMVW4~^XZEL3QwC6|9Kav?0Mw)Dh`V*?ER;kNV`Qh>-jSR1AfCZUGjO;b0WuNf
zIK%ojGO^4l$x8}Tj+NI6A=(TBv0S|tlSry1OI+2WLW3yH;^5XCf1o}wNAQ3$zc*5D
zXc%e1Y}FtOXhyI%uTf{r|Ei)0%v-XU)43|0jc;52<EacJZLuZs!BkLqW6=VDZ0t5u
zSG4Rjpq;h)Z*Z9L(7Hfz@Z0Fm3#Q205eLihg`y}WQ2eo>&cT}*Gj-!^M(lhtOh&_c
zcv4hcQa-3qLwp~i<Arbbyh@r$aOEpDTcxQ~13^$!qsFS6v|dU;YPIa840gN&>+jCb
z0j*t^ic2+G1*ESeSl)`}asBlB56LF@ZjKHp5VK>^QGDy_=5kpf*Q5&y*v$h1&Mv<e
za=&fb#`<wBqQFH@L+gI}-kPXETXv3mYd`NsHTQ~mD=q%a2)sBxs5siBokJwk{lV;b
zypJjv@n&tag0Z&RWk6o(0_Ct5r@1?9yS|M6)fQQoD*aQl$@c}AtU;sGZo^b~`7kMO
z_fzPz-dkCG<+^wyhcL4&A5y->XDIe-WQRLRs0y_OJ83zE2@*}AAQZmcp2KaR^<?1Q
zK&`U3wteDu5JT+`Px;nPBBlq+M95R~qNf#^z-p#&wqto3&n((_V^|pc=*_Ln^!`ai
zvqt*gt+#}2;7K+MoaYSkU1~(R>3L9+;prBy-Y}S_lOOC*BzmCTjbE#gO&Cjnhfo;d
z%qzgCzutb|ZMob#XJ!}Vz2B^AGE?+;-Q4-3XVE|U6_Em2=tjZdr+?aw%9?b0l5?$?
z{{U~oj@L@H?1cNJUPgx}DDVei*&KRpkYmybR2X>1Q+9yk$`;<oDaFWSapa0%XZ;aF
zv@aF2oX`~b72qPOAgW%zkE7xmW};lfSwv~0J!O}=icK)Wc5OMpPDhH^TM?VfxF>e1
zH+-{W&WJrDyo+rd7Ka2j!SlESEjTj*g6hkZ-*|Wrv=6CzQip1KM<Af<0DfJ>(~|@9
z)H>(@e?ToZY*$zN{Op%b4~M8dE~!}VHg#=G&JOdS?%UeJ-tDulr%Sb$-Vd5lCQ<Y~
zod9CPmLI6eD{41I=mQ*-z)lJ^jtZxT#&q#pQE&#Ws|EdK)~}wDOyWn>Zat>Gp1XIv
z5VG|?`*fUpoA~BBuEe_}vEMmm_Nq@fOar}CcsC8e+Jgx7(ajK#!0$k~>+fp4H_WT?
z^RhhyN2fSB;3Y-TeGNt}=;q0HbpX}YrhME-)27=IOL}t8n{r?!N4Ocf)8X#_&Pcvz
z^u$lhI;AkU;W8X6_+>i^bd1ms5Dh=AF%Pp{(0hEGO5FNXVrz3q>5WQItzA`B-^g9H
z!vx3p^N5*Gsr}l<pi5yE*-dV`%SE1-T<kmtmFVfxHEH)s`x;2^E+tCCc<n963XGcv
zEw_`$q(p3D4L|nYba#$tuNRmzV~`<XLjuiQwg@bQsLG1@E|PG@zNM}k1BAeh!Z(Xa
z?pc58!KNFrQoy{Xj>aV9RAPonkdE*)x-uVD$yUZ!c_#W8O8Mr}<aD^*l}ZliMH~1J
zSKcPZUrwjyVa;mBqk<V^D^30KxUw(|zmJc_Hp}^ZmFo;CDn2m;oGd%xXL=}Se99pE
zaUU}O=$l1^{$+W0Jat?rnzB8F4m(b#7_T_Byooa*WR<;F<pP=~u8!R-(OtrhIy%;J
zae@Laye#oPu*GD+J*8FH=Thx94J=MYfmru+?-MsDqg^RNig>2Y^DTUFAf_rU*1htI
zv_+F6>JVXJ<!hp7khs>v{y2tZ_KTs@`f{xw(p4i9tX=lDs7Xe3IOjBJAf@wt8yR*t
zSLq;qO<zo0qtLOj^6j3|z>G;rtvj}5ET>Fe@8smrnH`aKOsZ`cVuz!F)h&@0V6obU
zSxupwgG*@%x7uPlV7JE>ruvIkygE8YNO{h1*wnzP?)w+cY5KA;NNUM*5do)=JvU%}
z`6gJ}+7BK>DBqpWVT?Gn!AvpxzZ0B&(@xT#pGgb<goLPzM;(7KE~^Sh1&0oH&W_nC
zk8I|bi1P^m3lonx!Bp2v?bVjj?Q>x}+(##-oP5Zy`^7T+RiC&Zv{t9hNQwrG3s6s3
zihSATGan;hN4w92kF5r?mor3D^}f5SKxjH$`APHnJ^P&##yJDh;=@(qRY`xCKme(Q
zf5|MwD#$k6H%=v0cArdy!M+HA(Q3%vk`fCJ^L~ivGMvwf2$yI8*y}cmQvWvh9h+Jz
zN?(0K8uH>2GP6EHyi*|Nh$)rHX+<V-#gz^`Qj40BTF#kF<m$_Hv0$S0Ve05iG6M;O
zsK-tCH+6INuv<jJBH7sUFH)B<2Iw-1D2XixnLAaIQb_XOmnJeMstJ(wElD;qal-$f
H@JRm)<I)rC

literal 0
HcmV?d00001

diff --git a/tests/data/regression_0.4.0.h5 b/tests/data/regression_0.4.0.h5
new file mode 100644
index 0000000000000000000000000000000000000000..4ee84136566db574971d988ea878f15925199f42
GIT binary patch
literal 194482
zcmeFa1zc52*Z6<vPU#NmmhNsuq`SLILZzfrN<j%pL0TF_8c7N1PHB)30YUwPaP;!{
za_{@z|GoF|`a8pC@3m*o?AbHlnK`Vp*6e+PrNktV;PK!ArvVlg00}_-dI;QoIS<AA
z;9pMx<G^(N^8>JR8FVhQ&$0u+zWxCKIOpfl&ddMLe9DUA;((L9!196e{4&4{)Y%ai
z@B~QyKSe-FTv*|>!RqI&hCP>U=i}@=3XnQK0FGiKI~#jP6DKDV<FkbCalPA4&c*;c
zM+-9xTLWw0^}ulqy#BKBD+O_Oh57loJR84|myX{`q_h0M@w;huR&n5c>Zi`qh0X-v
zAo8pxk>_!c_;(Wkj$i%r0akS0oCwqZ7{GsVGnM3Th=BnhzJ2~u!<^j!IL;N$<Mwc8
z=c&){zkD7C=0iRE4*_6CJj(za$0X<T0GRJT=f!z(|9WxR=fHpR`=ieRRGf?aq!KTF
z4z#VDr3;-CQ;0bu3;G;52l6i$mwgT_KmL0^2QC}G|2eP!X#B>Lopk_k{-+gP9KV3Q
zvvi^JenRO3jo)(=zl6AK{E8I*-QySf<lDhXIR8fjcK#m?4D9U0(K%h9gaE*P&3fe|
z`7{UAc^n)Z{Obj<=hs7k0Z`5^1zvd;LqExX8cs7~S!e14070kGpC+9BUxx(5pVvs<
z@b~$jwFUslKL7uHSwl$96PUFw%W-xdIR9rde?Q;Po?4cOd5GiS^^9vbV1+ugVaKkS
zsm3W1LJcL|6^2N`U@+s#(-=>Qm&aU+4Y*Ez?e_{cT1H?7I_p)VoDGK=7azZP!OeC<
zPQZZI;%e6L@*%VxLM0N7R~Y2A!dm%H{XQbW?4Kv!t_7DOCEd!&VV4@upXxZ+t|#W&
zQ<_b)CM2O9T<Bxar5lU~!(7A?D&c<1m?MPH=!FNauA7#_#wrc7%`W}w)kud7ZOXIA
zh3-Xko$Hma7fTGRaN`kOcvevbdPS5dR`CWSj@YSfZ89NvtJz+%;@KU-BHMoQw0$az
zwS0Y%=uV#?E&S_6(~@~B_7MzLlx53X^Sv7{&b#Jx+zXv-B3mpQmV-Sq+N$V6PX~zw
zqHi8bLf%i>BjLO?#*Kv5D=nMi<y%=ghEE;0l0Yaihz4P8-Vw7<q`IjSx=DNQ^}@S1
zXQjkx@jBkgx=E$<h`jJ>yhMmw;tEkRl_-m=kr$E8cu&!dn^)N*o{o|C*s)`m^F5j@
zawM(fYZkzUa1(R+^jPrpi_%G5Z04zFU?A7PnCXuSMrMmDwIWEP@$B)#Dut|M7bxym
z3|i{5k_S<Y(=L$nL8!g2REuR3{bWB*-I{@p=<dl!c2Ww@7LDAlwV2~hS9v@4(S%1{
z8xLije1uF<(w^S9D7T5PUYJ{?Bu<^3z~J$FHC%mNPTTE1Qt_)498o{wk)|$V`a~iG
z1`hZOD19&NFM##Eu)lzr@YiN?HGl#Bf1n$m=rJs3%L|xB9@K707IpCdM6ifuZzbKf
zSS0Uy6D`QdU_{%HcMX?S>+_zw15W7S7<qI(89_H^HazL<`-}|~qMK1GmhqD1?bBJ4
z2RH-oZ9=vS5lM;h4m%7-OEC&JuY&bL`FKmy>yyHjM?@G#-DR6_)*w@fSB^I>t#%b4
zN0&*bi(J8>NOwk<dQn^{XcSZt$|tQUQy{=TKr0MgViIJgy>584&*}2ga)d-dbB$0j
zs{2j8<6vAVe<}$wUX^L|-Q4PXgUx+SJ5@#AfB~xqkKfs$t&qO5ek7)i;Agf&lsq2P
zuas#*Ot7<Ue?@Ou9$-aUdot!u!@<cWbh4d{VyM9;`}r48kCKek@f-pRpgsr^<G~`<
z&q~t03SJ*-3SiCWaL)6RC3W+*#B$=NI6|3R<<%32DH>)+2owuT*f}dDiD)dk6fspO
z9Vs>(Y)fdyH>qUiibL|4lS_G=j|FX}{9dpVENb08GmTabM4akDX*mK>@V#mo^*X|G
z3wt>Bvr;tZw4_229&!-hPRMG<w(<vH(j@bsWF!w%?opL;WksgFe?|~hs6(u=M8~+M
zch6l6qw(g)GAju49Rl=BnjOZIQch+R8N+%^+6|)(9B02;np_MBOKW_h5GNAnQ8%0d
zty_;3Y7Tu}?2Qqi7Z)&yO>LgOWnBHjgSltucXIV<0J8Z$X#WC4-wXQ-NPREtF97y^
zI{jI0;ns_5>7273h9nbOYoXF}vF}Rf{oW;Z9Ni7@nsI5&?dl37=Zi9Ts*}VBLTkSc
z`=ob}A8zbbs>I8{ciV2L4tRKXGJHyh;mOE}Nw#Q9`P+9~nBFVPmOtN*FnX)Q`ZVox
zgpL1V1^%AJGEq%Fg3A56k~Ws7wYTY8yyW?>xr>@q>Y|f0RUF%UyaEgRc*s)PVi()N
zYoUVp?11qR-iX@qHon3Q;wxNYhMNN!#xgrk@)w%0HjbZznT}AhC_2_Q-&NafQ=?Kj
z>;9i^4+elaQz^e{g{S%+&euBrpY=+Jvtkec=3fua)lvYghtNrRPyj;<XD9Pprl!^=
zyd+L0CM4$0&h}0`OiX4L&gQoa8BffTOjZ`|b__NKPPZ(WzUTfeuv1gZX*GpTHt>_}
zG<?%z!NJvW&rY5GLqBf~0Pv$8d)B_QtUvDmQ?JxD0^aGzyr*Y?dL;n+e@+LUB{@G3
z(gvRSpC#A%>8+>#bNavK?CJOD)9>6sKL$_|f9sixEjc>>76Lqb+KH#(lI&-Rpgo8H
zhyaKHhyaKHh`_&^z(1phLjUD@=&4S6>Kgow9y&vQ))RlJ9%^g+ztcm5NPmAAkenxb
zoBV!0pdQLx`TO~P_SDPtP~J-vOFXoTEduGG7kPkUAOau)AOau)AOgQ40WiSt``Hi9
z8x55Idq2Ah`q`zxE6-v-+9!W5Kl>l)<-pbe<)u9S`}SexQxDEbIIZe8?e9naIA1ee
zw!2K9B=}b1#VfC0xflm>x+MF>%%B*E0EhsH0EhsH0Eoc9lE6QsKm8+iO?%F>9{5}I
zC$38rA`6U*-44>9F7g1yKm<SpKm<SpKm>k80)J$WYddc=P+n?(`r)~kRLrwWfmfcz
zezXVuyX<km)&S+DJpTLkxO(C59q>*2`;q^jv&XHSkBk2lThbTL06BfDz{O-x3`77#
z07L*p07L*p;MXB=`fp$8WIK68?a+-jIL|35nXf8mBMia-DGSn-5nJ*)O6c9WP^A@a
z+7v`_x*|-bozF3hFGj$Es=Ft1?lc*iR_T;}8Kk0FB04KYIY9c($UOE?D^+**lGIY3
zd2>y<XO$6gV21S72+vL(vw?D>#z#ZW^he3#R-7MhiJ`jk&;|x|yD7xPxw6}xF77+2
zi$TX?v<n~?&H5@^w(~38)fTM#!7Ti)ApwrgUS(oNe%<rW-oFd#d*45Ag6vzqIF@NP
z)Qtbm+H}qtUlk(p-RVPSaNg_IOXvkMb>~mR0Dw*X{#@5z(Bkif{RIvGUf5sIezdO-
zaKfJGmOu{c_IS0|6RyQ++dq4|UEICO1&iL+$=zLLXg)SX@2jrGX;gNX>XS7|Uk)tW
z$Cj&d2;v{2EbYSYy^3Y*_VFUHlj)!FWs00IhkR?T)9-1Px>#vLO)8N(R>v#uXjoM3
z31_y-9*CK1&VChV{kk;~++A<Bg`s&Pf+JnKE6Cu%wEomqGjKdn`6V7T)t)rz`yvcX
z#&7zK4xuyz#vk|CJ;@`{54p<xh(?Z%H!l!Yu3Xedo7q#ypdG_=8uzivRu8=S;fQ{5
zonxM3@6AT2xWIwrz)}aL7ke*^_n&<72_3(-wxc|IM`+^4RQ{wNvh8-(3st^=W8ZLv
zNM<Ni=h^!sGhz?nVvu63_GmkhtbLEYlIHT?2`Dt`9y!v!su|A5zqxun>BDnlB{a<-
zY5UTarFq%GPEC9|Pn5;X-L{CXo|433%Z)cgNWJiM-6H04>HItQ4Xb^z_qB!buEA2m
zvGJyLGv1C$z5WPKVM6>qZX#kP13hZ`!%pt21nN&+@s4HJ1Fz#aYluHySMCtaW^X9c
zh9bg;P2BLmaqAj#^t`QD1j5O{JqchLCp+(x?cSDyF(kHhvw%)zh=4d}OJBs@2U7Q|
zYgh-&DmKjX8KT#=HPe;uNktZ6TiooSd^(NCJ6NNRwPqy3KTEHuDf0+9mcC28wFEWr
zYWED<x@Zm9ds>pkJ$pY_ynrVU7t9lu<Mr|=6-^GIxpoNzn@4u<1UxTpee0fQfwgI=
z%o8U~a{5}JlUh7^Y)%vMnJxIuJAIS(>X;h_#e*F2Mq84mb5*AJBSio=W9kn%9uST@
zIaFapce7GO!lMgKO_JYKiAm3VJTeU*`Gm7Fv!3}ZV<9>+F;m4SzA2@*p7-{q$KmZt
zh*XdJr8Rzz0)@3|Ul3lR6XN+xMtcG#coXZya@Ny=1J<gN&T28?uX6*9JN|LeKI<2o
z>8J5BgMQDIA_ucPFwK(p0q<47s$QzYD_S%#Q-wJA4Z{>%`QqsMnnR)2=T*d8S^N%s
zjF=c+iqKJXN<K(JD@K^MvDS#<FnkfKnSHW}y#2V6WEB-?eEzbUz7KLVR8UBY+c-U~
z1E{8ey77#8Y-l{GlUk5%p<%EHEeLWf3BbL|Rq$e+kLoN}YEHXNC|ASaoobPS{^)<`
z$KbD%(HjgB@Dp_B_saYPz52Z}KSAfxeC;NZlV>*n_kDaaCjO1#kNj~!djL@WIs3^!
z;*SF;opr!zRlk`p{wn(k?6(px_VT#Q#W;}DCD|`#2E{-GKm<SpKm<SpKm`7k1pde$
zclr*j(*Tse)t^p{9GAa`ZeI55g@B9;s>F}_Owd`jpJe|>`XrD8P+rR8AM(eY_JPpJ
zb}CQ9Z=PFK&^bGGT2)~3|M~qXm#vR)yU?p=H#*%1UjKf4dIaS3tpXR5K`{^k5CISY
z5CISY5P@HSz(1ou{dM-ZQ@up!Wcwf9`w?k+)*pY1{<Lt3{&eHU*-e3akp2WD0g@mB
zAOau)AOau)zb=74vd8tGHytQ1wLkr^KEBuP?9!(fG>@O%9yiF{F@GD9gEXG#oyjV;
zCIQsLoPBsUqHCH^h9*-r)vixSk^&oIgO*=~KUi)iM}%v3<w~~Dpt|{r!F$;-MC&yq
zMhM^Q8V^r=Ns&4Hbo6LFCHxvRQS&DB@SD`^oKV#Y%QE&eQ|(o+B7L`IJq|xsJ@a~9
z{7QKnaF7?rO8G>fz?)q$a1+f#;WbtAQ+ZDNRz0rbr+3lVJgbo9u9;H`@Yp>4#EPDD
z1A#ZqkFLE21y?~(8<*3%`fv&HDaS;9!?TFjn?~L(9srM!R+uI{E$NSXw;f9NlG>Hr
z86raIGA8-`bze$ZQD<&oGcohr2^4LcY`Ga}GAroK3~66%a2w~!yRy2XvIgy0c>F6o
zTexmlJm=`~CbjRQu}SNjq>X7Y1U>?Lu^VE5n2oM~XN!dNd5@TFqDfE4j^o4k{k7;y
z@7f(2a$Tc>?kGr0$iwZpJ9Gtwjz4UC)04$T{k&ComOB4fBQ46eOJ6h|Bcir@kF8zS
zy-qLBdxl0%rM6~ds-F(y1`K{Q;^cB3vE}l0{Y>Uqp;lbj&#HZ#>rWMvmeNoMUrnBk
zEU?pe9DAQ^Ckt&z!KlKtmzJA0KckK8TycCwBfX1@)PS1vVJb907oj6PX@vaVh^tSJ
zbs9h(R?5$8$zFMoo@N@}snLrC8c>sIRs1GNLC2g<F;qnK;I-ev`+ck!VnwmVD51&^
z$i=T?`g$Ny?l*m|q}#M$v7K8RB}=2u=R#T{0v8pSTX_6Z@T?XEaN2P<>gt}n-E$RL
zXv{`iDhpYak>SJxyEa;vz8jAISTkJBoAIs7{Ls=ERNlk^DjEO>SunUq94aT0P8}N@
z2W>p|!0kD_sc$0RywY6#-TYh^-n<gByL;m@307V{IZv=|#1o4)hfQ7K!^?7Sn=+B@
z|9Ax}lpWph!0dIT-dQc=aflJ_XENEDMkAC<*QCG_34nJ?<-BR<Bqn_rE%~ruk}i0L
z2Dj^V#L~eAtbuQ4wCt#@T@3q;i1?g0ShWc;FB~ML1yx|74FatX1{1p^z@^X_g(Dkn
zX7cn9Z<Wt)E+v`nJ$_5pqAJpY7H5F1gR?56xtkPSOmq*~BV2cL;-nTQ%YEdjZ$zpn
zd{LLKPE*R<OSONHdEL2ue=7*TSl^%zgS+*A?8)!TT7H5K{a%@$pkKdN<|pXh?+bqZ
zcfCA;)Q4C1>gs&wyB7@ZfyJ;l{OUZf3{YS$Uwz`tz@iGt{a6*^O4aZ%E7RRsWJtG|
zHdYrmgIhUX2}44e%5(Afg`cn>LK%gbAuApOLXt(1y{m}J@TDkd(lsp-cPUp19umMS
zQy-={6fFDcMW!xFw=7*hvc<~Xqo>u|Q*620kS*BML+&~L5hE(&UA*kHS6e9O6at^T
zM}x+6iOr0?E?9qjsaotZgCpz~>r#%2Zag8^rr9yh%~Ht#oR_N4{a!qCriTP8=y@a7
zPuEULFfR;tqglcRW1)cePN%i?O@J-KTx(E@6<wh93bO1SHYQGPo1;`27YEHCnKpMD
z8Xu1PGZh4GxMK2Jyf!L)_f=Q5wl{3__kH&eS=KYXO?1biMkT6RiWYQ+tT9M$$v1fK
z>d2r3nN8(1Z&2RH>d24bd&D5t)~O`?B&jUV1imt=R-j%&8Z9|P*QWM)MWX`30koDM
z?44sd*nUw<_G8mTP6Ixp0gDyhM-<^n%6)1ZmJejIZhgG%;XdXa+%6+Vz>6oyUo&O}
zdHpReO1T}`$&3YsuLS<co&?l`fbvp(=)doen{hkq)6=ScbBFz?fBm`V<2o*Ab>9b%
zi^KQh(<30KZxy(h42ppWfCzvHfCzvHfC&5o1pXQQ=^ydOk=#4$kH1BKTD(Mm5^}mY
z2tfMNMIN9ShyaKHhyaKHh`_H%;E()qz2}Vv%1iA}KltM)Z=GG5azXR>(H`{gS|1PO
z0F;;V`0v}}?8DAF;IyjW%wa$B|8w@Z<_o=gK1fbPO6>RJ(<30KZxy(h42ppWfCzvH
zfCzvHfC&6D1i(^Gn&N%3ovf5`u_MF?_XN<DX6fQB4aHa^tyzj(fq!0c%W3wJ$y2P_
zrKfdo$Rvbk)-|6agB>I$Dn;d(1<iObRa}F8WYTvC=Pp&7e+6NEO&2_2bVz{&`&OMg
z-{2g%O$j(=NXyGIba317CmJP8j9iXvc7(Fpyabfgx?mW!YuZrx_BY9uPhYHgQi}+e
zmS%%=x7G+o<*VErTHHmJ&CU_JQ3&le@UrcWHajC}<>MvU-Wn8*ZbJz6xji9=$B>Ro
zR=3w!EVtcJIUskrX>d2#J3HT{&njj!uR>=<L=5A}mk7OkGD1b?eCV1~n`Hw)6++0j
zwuGM7o|N4j^U;mBRkY<Xk_-r@g)Tm;#qwuWTV4yA&(Gm}CV~U3Fa{@zY>hb{8q`2l
zd{`wL)v)0q1}B%z?j!$V%_Zd6CZPDFB!?j5AmW}D%C=QsMguECi_dK_!>X8IbSoV9
zG@rF%&jTm-wxwGofQUGLVxA!%2kELq*<n*ksnmIcc7&R&DSWwyxm^&JNlB$vaC|4V
zI9Vy9wvg9>y?5BdgF={5m2F3@%!|<Jr>Z4LU`E(8jiWDilItXV>%;FWL4Ja+{9c)#
zphv$~<|pXb?`vfKsDCHj)O6ZXFrQJh2y@39If3z^>%DF?<59lJWvEZEyzRIbKB)J(
zNw~FRysIz7+LQ6H9DrT{vyJRx7^JG&Oz@eq$4D{z>ND!l2OUSsv0GTyjOkUnEjod0
zN-;@{scyb4MOCB+_Ojd^0zz97@b+m>Yh0-0YtnAtOrFy+6AI)M#ha72s~m)}bJEz%
z>#84BqlL5;86VYbyYUo%j^8uCen}G0P#FZP0Pn=3{URs4Z+-7xPLF5PPE+rrgqCc^
zuxDo1*3IO^rzF%y@v}eMwPdywZco=Tz<%WOfnQ8_l!>0)N4F3ui#NZi8t-o2{K`se
z=Qf(2ht@oKgF!f2e{q}kxbc>$*npGcTE`17f;&uBP`YZ<DA8KU`(<bX2zzuC6l90F
zgM0y9d*(YNI;KS25EzJXx%bp=kx?k2bHXn0Q)#~R)6L^trh=_zqo_tVe~1Qi*D!Va
zHQ0)OfaG1ez%-Y(d$9e8`jj22$eVnUk=T1lX{igIqTMe-VwoOoIJKMG+e#$giVQ8|
z?H<xIq0{1z+!O~`X8FH1b}wgFd&I5bYa2Lasgi823pZx!iJT){K=3@}QSjRbg;$pi
zUC^T@)eA9q?{5)yQbIivd$oiPt{vb)iCUvW+FCi1%Zb>y9HFwv<4DNaa$_fNV{|?2
zSp|8XL!sDBm~QZ?#0Dp`sGz6mV9ib&XfK?#ZnSn;>DnHD?Cb;gMkk~`&gZpSUAaS(
z@%(-|d+xY4O6comxEYc<0SD-ZB4S*ej>cLW;l-)iYF=nVFKN=*h@N`wl4vh39tL!f
ziRRKSKo1IPRC{3^TZ>e;CsvBv?=xHMWwpj%mlNkUCnGDSW|EP}Gp>2>>w06@H5J>w
zXVKVHg1F6g$U4AD)l?C^+onpJi;m@;>#ZVNrk%6TBGCTdLg0`5aX@<jQ2sgl$v@(c
zD>{9f-AVXc>?bpq*iVcfp4}9<2XX@0Pk=-~5<~z*07L*p07T%|Bk)K5xP|j31Lbe^
zr_*QrFMnT^Sp3<gc^6cPAN@i9E`J=515jSd;~(<Jo%Vsy$p(b{oA*@#vd=o;w5q4s
zzJ?$9|2cnLFYt!Ig3l#$(Zv(rIbA#tih&4#2!IHH2!IHH2>faU!1GR8RDZHHZasg;
zAU+S{YLimWeEZH_0WFT6gCefcQmY`P5mNlHu>vABOitA{S$|RQhZ_;AsKrE``yN;-
z{dz07cPW-q{YsCu)$eJDQKD)`mYN}M*`cuLhj6;;o8b^GrH9pHIXoCn?Gasq!NjmE
z^H0xS^2E*7I#!5=U!~-ca7)lgAI&}{<RhyDE44IpSYe!PX5cyU4--(;S7(WYUER`o
z+jjC(6!Zi<FN*Nghyt7CUKlXP^a;|lPu01wi<s(%GW40+p0#3Yk*~lE;O1UUz~qTo
zVbrrhBJPn(ZAw(;Bj)*j>w#bN!Q!AnDGC}{#$#l)4#I2e^<S`A+-fu$7;{CD+Q*;U
z;OJ<y=;5KZNv<6^iML2>O2rP)f8pBef1wr;%tfshWMjIAkwhNf|Ddp)^|SF+#>rMK
zJ}`RIT^f5s0_Ks=Yp}$hLvtD1u!*25QfM7NR)p58+f*qQB$`l^KNU+|(3Zw_wIvs_
z-K7(feDcg>Pp@`^9UelM{B8%eVhAdR!dWY{JaGaa9CacXXNy*aF7)g3tzvGz2T<5b
z#ukVecrWEuO-vYt_17$ELy{Y;%r2HNPNY85m3>E0LdMg+TS>3s@ObJ*mDTgC_qU$Y
zJwUn9nrRy4?$OE`zPpw%d|TxMjGMLi09+VWN(@w4c2@gB@e(<?CP$Z~d9dfieNz<^
zgcYPVA@eq4V#4R-ctnpWE8b%tVTB6%+G`((dM^dI5=n;5JmYI7i8k^Q5S_r;i7MGK
z_h-(PY$WiJkJ{mQjeXJz!>yZ|V~j8}_}5-%y<D{6aI<E&#tE+P<2#~l`QYc#DBg&L
z3GWM0$65R(Pg&v%m@Gpi$_OgN&OHs~@Mm_D)uPdvg?|3euI6UDd&Is?+PJM4bbQ=&
zr|xJp>uqf2e(13%>W}__Z{xZB>n}mngFyp+fnoi<F~7j*{@$2hV31pU9cKDq5P*Mv
zte?y=PyiA5lLY?A9|yDt0Og;vpZs<HIH3LHRKlNZzp<YPRh=>TTkI$6m)K7LB^SYe
z=XCKrC<Y<`A^;))A^;))BJk@F_#=N@&v{dU@>2cjhv(zG8_zDaxuALc=nwk2{BdFB
zRMIJ|6@tc(Jef9-R#jRmX9hjq(yw7&)q=oQC4BK>79*Zv;Qe0Yc>JA3z>p;N!er3(
z&JIf)eI96n13O-mJHmDh-pAK?1I(r}@mLK%J>F<=^8nu;d(km+bJ#fG@cQ=4+0RvI
zhBx>W67<D2g}Sk}#$a*#NYq02m&@>O(Jg?*WEi7xDTUp{(Ra2YtbZ*?{xTN@xw&8L
z7&Ec-eNi2hbCOYh@=F4;rheutYXOfW82U@iGX~zLm!^Wj8kx4njA_0{>An+itxYU{
zx(?MVXXfbW4i+iVd;pdm`CR$ZOKydj4~KMyJ^*e${~D%<h7S!A*t^aK({K<zV&%6G
z7g)`^(qA#}N5ARV4V;yug_9O09<594BJ@RJLWe1rdl7@2#()l`G*Bb@Otz$uA5MdU
zKStEeK1H}S!L-K*xy^K7O<L(r?enH?a9aA0p;@h3{q1=z{eENNW;txVvts1B9~`Vr
zg?B}y1>mE57c_(26YXJU?eaE#*{;3C>&M+EhfRLd@{EBr$7A5me*5MO8@-%&!!|OG
z{@`<GjoBFc24_rhX^hN!EOD$*q9api_2sFoHm~fb6avB?3wOHMls9j$<KFeX<NNp;
zNi1#N()$L{hAJ5UPl5siu#x_USH1j6GExtRyOtDrS$Ro<NF~YhJK~ePHr7fc;`la2
z-@st$zEwe8Z_En9^{f%l91R<;zEAPqJ5yIc?D%7^_TJ#ZLpBb555q{0D{-kcQ;OpR
zGb9CKhTK_euWKTSO;r5TU%o%$L}Gz7_!!L6<Zc`}8P;yq+h@Klr+c#Mw$S+2+I^2=
zw$c*!vk3xjb0X_cwiDxJt)M|BvB=akBsj%8H|Pk}GIq2yna9qg_{tnwbyp$b=uKGa
zTo<_r2e_!&VP2#e>`=??&~!XQpEM6x#m1w#<1Ar_7ntu0jr2;60+0%c{3&5R0S<H8
zJyC4REx0kx|N7^6`z{klr~Bfck~JUBDVz81A3Ie@Ga8jfS&4v`xO8SZ*+e}tYD-wE
zE-@uc_rLx|!~a_GC;9^hS4zXOH5^01Ow-PgL$WW*TNE%7yhj7Otk=H?Ar=+gg~*$y
z`)qvHiVPdV_n!@knBO6D7VzeUQt^i@E%+1ctSqMZEH}epDyJ1u8s$1Xi=553dX_1k
zIvT^(fsVu3gTHDyM@g>mL@KKCv(A=FKly#m*ZogFD0N|m(y-7$6DT~Sf#Y$ifziyt
zw|?G5nwB_1zW~Mjd|;*(k`YGQtJq2<@rhZbE`#$eS-7!1EUJ|~&U;NUmRd%|56c&<
zms`^Ac9`Gg-Wj3YpMbnVLnec5`b7-f>e=%WPu5|9NcGJ!tp`4=2xfw>&sstI3C5`M
z8hEU$w?#w|0mksFnYLbZPj|Ra;3T0^o%``r0giDrU|$G7%84b{TIR1Aa*tlavHkc)
zcQVgjW34cr_t}EuTA^4oRT~C4k8T-#C5A%#efHs+coJcYG+)jG_N4Gxp0aJ-+i_hs
zi{K6hdz@>1c$WIXCekq<!zDKh7>2I8tBJ}>0>G^9^o~PFDVI!J%M4D<lS931eDna!
z_ok<;1X|sEsFc(zDfZT|VK(G^oc2dfyrtkrDEDP72Todnw*|Keo4n8?*O<u*))P$O
ztQ=6t0T!!GT=wCAGa$d)xPF0g`Moi}!0`Oum|tL&l76*5;xqsNe`2tnjPk!S{E<Bg
zs0RV%rTWl+e?3lQ=NXvOs{ZBcaiT70b>Dknqxt*s>Ctyi7te!YAOau)AOau)AOau)
zzYc+aMt}N8{Bgaf-=I#y-=aU|U!p&Gww~P-xCiM^KoTGcA^;))A^;))BJk@H_#=PZ
zhx4Wb<)!wgAN+B!GiR61UC=y!c7NQzt*4dbZ-{{bAOXOq+)_?|$?SIm^?6$T>ALUV
z3C!nd3L(Ff=_E<$JlX!ln)+`Jj3L#`W8C1yLry7Eb;k?U9YqX`Edq&t$fn^nCXzA>
zg?AfAVs)R47gJR0&q(JY$g}QTpBF8^!%>oFMkBIF+4T{v;jJQepZNhU3~HIDc`R~f
zH-}gQ5<J5~w~}L0wI1G1k}=6Lbh^x@ymiS(vBW84C%y_V>TB=z;Y5?lL}HS~WFF1p
zg(X^AxkJQor_wmc9I;nRMG)M=Ii}mAyqfS49DTZ9(MU=%t&=Hx+MAnhoi~&owQV6|
zn-$i(tNrF~%cuP}haH+xxs&4gSz)q!0(I}M&8Fs0SRuMBWlCZ6)tJC+NF>tX43UF_
zcgKk@;g7t9P%4VNLh)$BfMtfrNNvG<!e_2Nn7{)+rp1UOJ!6zpUY~u0YYYO+3-!)3
zd40Fid#-Or%bFf-=~~+8U5$}@FVHh!oP{!wVr-CW^fW%o;;{|5MVIaFv`qTDB{gWL
z##~=DMXv!q=-7{BkM>A#A4E=Y+cn;Kx|?cxb$uu;*vPzFJ-m=3Ry-ZqUh`H5GR7nz
zuQ;hJO3y7cj^ucRI9k8;vHK)H1nfjm{%7fftpv<Y57mn9214~>4KA*0@G*1xICGB>
zu0B}6BH4PCU_8%&f_c?h^{~1uRw8|`mgc%q!^iT*pk1sv?m@;uiO==AS1|C|1)a$#
z20a-EcX1&?@L5t6wKf$e2TT)(#mmBtkt7+#$7DGuBi`F1UfK8RnI@<B$e^sAEWC|m
zF-+V&+_WMmL5CL4Rkd=}EwwvrpuVeOGMZ8dqfjso!C)c&DQ~sd;8Q0pi@TAdQaQY4
z@&(~^i?<T%Lp5*l--8sC{7~T%6gnnPW$m}}cH7dChsg*#ba!SI+GoRE)oMv$R=x9i
zb8l0g2f^&o*n5gB2BK@vKSj9{;ZBY7wq6nST|vxSs-5!ir<^FlX^j?(Vcq%2L#I{&
zgD05jD_QbbS=T`SwF));F(f*xScxjXheVSCg1A{ajCv|1`P8{Z9(?X(me!waC(d8>
z6iqVUp#<HRCi%(j?Qe+s@Toj^>8T;N-(6wQyNxYaeY-2xHz`EyNQQ4PR-9J~HqW<(
zUns(i*%47Fpyu8Oz`fR>-hNh5v#L1#NtgxEUhM3V0vfCryOH#7pUu)ge#q?ayFGIx
zq`icKG}d?J_BA~hl@7hAXLWg&?OZHuc_I}ZPLQqjVMq@J+xR_p;U+k>8O<TINf44q
zo*nSmbu-~)wmM+j60jP3qz$K5Gil2lsdLk;cz@E_PG%3ZOh0P{F`-Va4F*b7eXra|
z^RnQyqibJzo-FPSQ}XD>^j=uf9`o*<Dew^US5VB=bvjnV*i!qg9K#6wj5fvBHQ@b2
zj_ea8v)`JK+0Buvz8r)JRZOtL*#%2lxcicXb(?RdNEG2}8i)EPk@4Glh~P^AqO3S2
zwom%^uS`E`rHPoXwmJ;u?LeHIvr_F(4?u6hW^qpjK+Wu0ILnbrKO@+F%+HuXSlA_X
z?6ZZiRqF7tI?E1!CVub(+u+f?F!=6eM}f0eaPf}d$#E@0vnq<D?5}nU@#seq($&6t
zuJx(YF<p^>-~G7;qpjSRZg0}e+&jK>h<z)N)rPiTXv~$>Hyiafo!eDHdPk);^XgMV
zJhPXEW_#Fs8Tf1NY%8OLhb+(B3DBydT6+zlc#S{I;fRY3c|FR)UIjw$9%k6<YnBk+
zk+pc+^~vvfD^ds9Tumf}MT5!ADw}ve%jcC017M-C=W;jSMK#s-z@=SfxPcthmJvvv
zZ2Fux{iE9#0n>8kh(@lHRzz?45Te;QLof|`=z1q4gcjdS&~X`PO?gA_z;BwfoE<Nb
zYUdey{qg&EuiKfeWTXr4(>p*uD6lJgee>zS+xtP2S8wwf?CanA@L0wa!)$+aZ2}X4
zF`}1QZ6&MLp`heZ<_Ed<FCSK<VPtMICB20i(m{tu?kOq|d=)^3L+JhCJ%ZNcQTza_
zhmSiHSmM3n2?jo}%VxnzZ9xG<;5P*R$c_lqe}VE+J@~)xhebVoMdwL4t?Hll!+tH{
zU$pyAJ-A5d_bU(7gL_?nKOayJ)(`#te8Agzcl>_7pFLGsQCu7h00#K-9|8K?NV?d9
z)l+@qH{tvI_{~YsA&3Bo0EhsH0EhsHz%NDMpV6ED5&yf?x$g4+wBEdR^Y@1*P;W-{
z`TcxAy*VZE_wxyzC)WdiDD(HT|Fo%>>CI%9=*@1QFE$0FH(%rdih&4#2!IHH2!IIu
zx&*#?<Gw#%^$R%VzBlF<@XdX1%rD@gd-V0mw@4pIz|ZfgTc)%p9iek$YAq6hygu5J
z15;t2E}l){X1SZvM0qWF_meQ4>{bDi)Xv=F59#VfPFH;d{E=eYWXE((i6DB$V27?}
zKYnD%{WyLu8<*$qLd<n|J9e9wQfj-A==h8eQ_0^S@D?(V2x2_i*IK9`S3U@-%pPhA
ze*2u_`U4G>Pu{5ajpAl=gv{ZbTEHqT%N2QwEFUUxWHa|CD!_DPWA8iKla*h|xxU6u
zW0J+6T=+l?Cop4~+;+uA<K=oEB$0^TOl#Ay=nnQ9M`403eqYK-B#Ev#xWG|Al^iqN
z_+vPwfD-Pvz7ym{4>)1-7ADolJH5I05pw3Lu-()@-MfO9P`K-ry!Gj|0sz?%69D1P
zVKPwH<K+_Y+_Uo2k;1c1qEPA)sEnlT@U$f5v=~-ZP5f=XY>oG*k&*YhI6{T56V`N2
zp~rOFX6QqV)UtNpwVa|n&d*zGi%fl)R_^<P?JD~1dv8YHy7n5aKUU_#R`Gkn;$ms|
z!5_`#W(NXPi&!59xeL|3XYp3?=6r5}WaHChRVkTjXt=qRCXyWE(CS5Zvc&gB3~EyQ
zDhc?X)YBs5a_A<xm}<U-hlU)*c`M!3DXX|J+>=dN7rZJsU6o*0%w#QiFe-S^nEjyo
z_R*}r{zGIunWwNu$f>X28A~?UWAGup+=w~EAvzpp-xQ(=1yk*7&t`{f6%-AXd@%r-
z7SIlUs}RO&v&|sY-3Hn`iuG`PI>s8c$I$k=DoiD_#FVy54tboPEZW$kBZMa&_^x=0
zJPxt>qw^Z&y%@Z#4y`@DI)V!_xxxqxt3yZ@dT1Rm6k2Fi6+8hMol1yrGBW{Qi{nR4
zG-g@6spULbB4&x7k(b79s+SY-`%cQ{174BWPjrP-)p1rZuIbKe;2*EcS1sS98uQX7
zoHk1e$n@|;ihK8z22~FnpLr~{t+*(LS8r~GbBeSSGMiTlJ-SXesI9b1J#BZb`QSAo
z7PB{1UA3N}oJgyYPTh^AK=D_6<^0@i%1BO!p$uIBihKT<H=D<_v!jSs@8SvsY=omV
z`Sbp|{|WzJYuO*IyKFfJ0+he?H$$GJUH-cSkHOA<3JK$a<>6;vclk$lL?8#Cyp+d3
z<bOZy1EG@*{A4=~zj?1-JnUHqoL2QT+t=_Tf8dHuV2+FOGJDds3%&aNnl{w4n*z@O
zIen`DFaab%1V98p1V98p1V99SF#-@6C%-d&(k3wQt!=(%j+;=%w_OIh+PRo$p?7Oo
zo_87qsb7~hsKx5ktz-t5yb<oK(IwVEyewNwg#>FS>5z&jEQteiLz2HA-sg#C6zRIZ
zE<<?#tA~h>s_tZ>%{^-l`}`c4wu5(@d9~hh!hHEbTm8MBeQ3rf+%}CSQm;{3)F|U*
z?v^i<(W|fVQbR<WwJ^?K@f%ct84Z+8%T~m(?9<T&+^gWk1~)97j7b!^Q3Xdi>ZI|6
zHO*Cum3W4WK4i|pye%($E6R(7q@VlPfP886aLi^6ZP_O#9$J|h8&JTlLOU-hEnh0N
zo+9V<v8HwU;|s({hHwr@*BK@f?d^m68Y378{3Z&5U(P77RCPSeCPX1INP2v$+I{+!
z12{`B4K?zy!E+Y8=JEW+IX8T*w}rI|PS7^i3M{41c1SQxQ>fN*A$oSvyNSF~LX+Q)
zIr-LEGf36Z1UtmgaZqX&)NiBXzk*qV!h-yw;3QD*&Ww@mFwecksaG+g%34IXB$kuj
z*6*_R6g_;?Z2TU8*4$_LxC~ZQO`3~A*l8l6T3pRuWxP*Nte{q)LPIc#sF1@#Uk7~*
z9TUDhrKd-5to``|>mAWsCV1f8*3Q1DGrV4jL!)xnZ(70X1|~HPA!?L8QSN(8pujkB
zx9r`rNmiGNv%9(S6zV>em#!i#Y#j0|4YC7=OK9cpZ0s3@{597*vUewJ`Rd9xhu1ZA
zOL3}tzjzcSZ$^?(X79)6AS&e+b`8247En+PND6xC*JBJ@zWCBObsIglNFaUpflKOy
z;Q_qp^~?p0w^Q~vQF6<R<xF0da-&X9K4T*MsDN;Yk|vpqX)l68J=4n~;iaR-MjC>B
zTOMcSn#ue#GbyuyvE2d}h}UcY{zC)VPz~5Ji3c{suj-z*%qhO!W03?KL(&Ws%VnPl
z?jEusgNRCfkjfR~qfaSrfK`ASq@Xc8Uv0NQ2zHn8%k0Cq59tCqP$KbJ2q5kn?CxIO
zg;BfJ+wugwc)t_VO~K-he*1!otKC$rBD4~Gu9B&)^Ak`1%fCy1n=$aGrZN2!T(AJp
z%mMoi4bTkwEe+620$HVgMy27Ov7h{P>#%_KlT&lXfBY`N2=3o#Kash_e)91O$bRxO
zj%!ezKm<SpKm<SpKm`6y0)OO>8#%u@P+sbP`r$n&IOJ!Vh5iN2<41qc&-J?mtJm)8
zlu|@}#F5_E%F9ASL?AUU8w!SuM#58(HA*Z_7|47b4{79+xXx|0m5^T4?dpg-7KvKe
zdZ-|2S{~%#eiQwE_8r0@I=LJ%<rZ6qSLwl~s*O?%><asJ>4RK)_aiJU6vQZ(n1+j`
z*70hat_;@;t@(3=KCdk<8`W(f`(zp9PWbi?1bRE9VGs7Uad6w-<3~OpnysZDvfp`w
z7CPK(>m*DklOBe<v$ffjj^akWJ=P=V+3)VENTD`0SV|4PIK%My0e$Tp3)h1;$GHY;
zdc+BA-0z2XcstC(C138}kk}?XVD#0TiI9YDdIm92qHV3)n2b$M(dWoPh<T&)A++cf
zN+$S(ZnhGF<3^~C53%Dm@gvkCf{XqpEs90sD&;+|X9Fw~cRjBfMv`nnr9;B%PvF_T
ztm$Xewc^KlJslT>UQ@(`V@1m?D~$P};1ika3!f>iHc0i#jv=2aw<y>Shj1o&ea|k5
zS)V!tXok;eMo@&*E@<vTf;&xQ8tXcXkOE3Wil2G9<DOPY<~R$;m_6XL<H5e>Ij|V-
z;XoieL#Vt@MjE}&ahrx9n4SxcW*(({UDf~X-I5nOWj%?yNJ>rVxUI}IvA5ZXhHUyj
zY84|6JbC~R-R2d9s*YGDfjWR9u*!S`!K;#9o|(YpQ+&QStvajIVG($iCNlLEx^mnq
ziJ9mst(+MtMe=sin=^`rIAM8vFX@O<9`IS~3R)r290<*YU2{a;6!PCu9~**5B`h$3
zWpbuy*qE}mqpOVSLTEv6ys{A@6r$sDY>L<t0NH}%%NJpcH?FQ|`dm;|PZ<v3wz2kJ
z0d*uL78Pe8^>XjTfsN*fTf>X-#o?~JR^Ou)r)>o*=Y)jv<dHZy)^~TWirFlFu=F%H
zW5?Z0@xTghGt+`^al}!av}r9!4Y^C#pQ_j4;Fw5R60f*{!aC9)FjuFw7u1eNONN5+
zqRmD8VaPLrD{x%*-^R%}I?~*TzIw0NAd=_iRv_lFASsxUWmm6zadm+1H8?kdIc^;U
zlpcNGRdO%5ZEP~FK9az70$W$7BNIZ5G^C)|gs}>}d$A4V`2=*_HM~}~>|~`ha?j=5
z7l*l)BqD>-@7_WOx5AJ^BhX>1aM8AsrCE`nxxM>%nTGX#qRr`7o|6Efe`0Vtk*D_B
zUiAbvTlvV*=keDjgFE!NE#t!W;b+nsbmXAhRm!7%DG!?_f?6!2LkG}>AQW9xQ&rwQ
z9A437qT8svRUSMgAb_Ii^Em!Pn>5~aTSL1(st_<4TIbY+iUq$~I0$k#fd=O$>yPg}
zxA2L(?0EQX;2{;wqj>fo-OLVFo^Ve@u$JFnv1+o4FZ4F<&w0qJzG*VMFJj(ST#8vX
zOA&=L71<fzA#}5>)Zjs$ThsN{!ka2B$;1ro27>`~qLxMFD-eAta~~%iKBdmNjp4mg
z-uZxQSrd?{x}?*Y;5+0|dnKkKj%}siyj%;b*{EDUuTx+0m|$?hWBGBpVEg+-tn!nO
zzf%hJar}4mm(lmtV*UNPGZbG<8}xI0F`}unp25a~shUN0w~1+Si6a!&9Kwoy5|22V
zn3d%B036-<rDsWKeshcuczy6Hf)ccHry2WW)pV^wwKy697F!E^vJ7LdmxP3$OW#_#
zeaWeMBN2|Jd3O=Zaal}%0HT#;)J-%!F=+9cf(~gpY<4^=B(@RH6EA3-04WnZ=ywZ9
ze%E%F=-#{Smd6Gi0~(uaJu+;kie~o#@@RBil3xr)C<ox$(O{lgEA~Ib7>w<=V$K!Q
zaT9p2rmwsPYp(Ka6*qbQb3Ju<S7M-H%<Q7cEhYuFc-Kl@?ec@cg^D+&H^4&D-5}+9
zuVJ21P)idAr&PPgN(%RuxFSzATnLF3Uke)$g{&8|8F_65UJ@;Gh%XdE;{z?)M*cJw
zy!j*I)}G|6La*{%q^{`Ov<%J$_SvauTcW5uLCsZV&uW~tSDoEqi#FNqG(Xsw>Obae
zhDdssH6h%3(_?{T!I|iB<l>$)-BEpXP^w_0Jcbk;V+<Zy7RL25p+3z|5xc1eo|q0%
z+o%hvldZX~ubF)Yf<Fns>W)vFKt;_x2m6FRTwuH;O~t@jX_l>}<SPT`!1D0n@nF0n
zUPMAy${p#MSvUR_#4OqspR04&@VGNdh@o$#mYu~jR;3W%^(87L=8U|3W$Q=E2#@(*
zwyxazWJ;X;4(=*Zq#NGOs$pK}=wN4>j7~Mq4P`bGss`One$|KgsWCykx3~Ia!}O0W
z?B6I{^=a3{eAC)QOLiP?%CPT-%kWzJNM|N@@&=&C{1&6e%&G)RI#MFsJG)z44u-<~
z;Pb#olum923jayqkL*c6JqRc-)rbE3>v5>*&lIrJs(xDz^P~QS^PdPV#4lTq<99);
z`+kWV<wbiEkkhvce9sCx0ucZa01*HY01*HY_(cf(Gy2m%;*T?D{~PtE^h@-ocz%%n
z^ovXdP%}UTKm<SpKm<Sp{*MG8^v~_%zgdq6vXlRIJtD|n{#zO#yE(w(>m&a9U=V=6
z+kD>R@Fv+vpc{qHezc}=!!mHNoF{z1SyO!&$?r;7(pvOxb%@Nac>^!y?AR{f$fO1u
z8;?D*-)&|7ea5QZ!8n4}eM#|6F&abz9}(CljK@ZPr2}I$2KRV}{ScmX3|EhYun_pQ
z)-ui#^zKZU@waKV$WLq)-I0`9H43UIB}&<c_QTpC#9BR^B2%!HO!x>>Ylfk(aYUD#
zT;GNJdR|}pAcE-0kTy1nf^2%Ondc_`j0q8eBTlG+u8E|H6Ov-=Y}5m=y~e5e*>@+8
zWlQ5z-+b{z7IF^T$SmT{`XsV}S|P0Rt^KT{rWJ%3bnDUJ@w{=t6p2an#*g*3_wV02
z^gY^QS40%zJ`moJ?yFI&FCK;^yM}KNR7EDF5F*m3?;+e}@&1*NS_(w%{mFLnCn1mT
zu#j^f94rc|<ab#l>m#=WHy4CmD?gfk7)O|u+A|^O#J8;FibCRdy-c*meRV*eQOzOW
z%%5Q13i<Q4hoG3*Vw@y|#<5HB!8TzL`y`1unt}fUsuy&nwS^3hyS7=B0)fa_#3K7U
zCL)s18*AxB8=b@a6UWKhWm@1gtErgws*dTi(;asHespPzsm**$DPl%At$eKVmT9*(
z1`3iK9%DY0Hfq_Uz&n~gOi3E(8-5g>^QeWvCiTw453nk@CY=tV^FoWnb)y-`g~G_x
zNN+z%KD+s8+VA#okAE&KM|hiMR*HT-jt8ww#!!Npvo(Sv7lOSUr&P7kDtEV2Zhc1a
z6AWsIsO!a(ta6xBO8Q1Gu{{sGqFw1()3JzFiU|%$nu7JBZeZtzZ}_<hA?CB}@ROv%
zJQz^GG~3J|jU?ScSimU53+a8cJ|h_GhH5h?GK-BWxffya5zE&0PEbL}Q{O8QWY?6i
zaNXOs1>O)n$XxP$Kn5ODkkXy#{Q#lPRIz#@AnOG{t?b<g8ET&b%lHx%coq%@vzNM^
zaj6{zk}JoGpN!`y8s^@?17seEm@lX?p~0+~3D&<Po}k28EeQ!n59uAAHRLSXctn|z
z;lwsz-T2;I8++l6LuL|7f^`^9+Js+)v7}Q^1IJ5LJfmjkfW-&*e2vS6u;f_`P-bA)
z7iXo`xxL*nm5}MPVeK9;TKZ1h1iT9}rAk<E)zk1T%w8Ol-XX+>`jDNpC7yx%excXi
zG=fm9kl2DO!Oss<d~<7{+Omqu{C?cyEOjA{W9R2hjvr$IaN0}PRcdUxy_PN6A6(b?
zn9KPB=imK8y+2xyvwW^e0Oh6rrythi@Cu(<7E&%)9)9!({aovD{>YvL<N%bH^7!xj
z<0>W3I^eXb-`rt8^8a)GxaJGJdOk=_#PW@c_9P&uZx#5S6?6n503rY)03rY)03z^9
z5r9ZJw<n#fG}KbE!OIC64wR+pl+{@kn*U_2lsqjn_afXpP5gs!(4_OYP=?FWV-e&V
zl*_19l6p?QJ(_u6rfm;>$9D&=1P-@1XIfMa5Z0m#`|Q*reZcy-HU9X@cyn3@0-&0@
zaDu;L^id%#En20Z8jDq_I@>*l?sVrZUB-j>jX~%YFxT6DA)+KCriaNd_BJT#zT9>Z
zX+vYZsl|D-)}uqql3R0(y(S)$FvhDQ-u_-d4r240LPku!?zYCJ*87zcGW6IHn(`Kn
z9VR86oiJ`~3{b_J#3`aJ^rOwfoSh%&7YVhc-hSSiFpBa<f>fn`gKcYxM#BH7HctNo
zU6NSuFuA|P$Fr4&>>K<7y|#yY+p2udGu}|Ecp}s2B8}H4=nl%kH{c6jaod-+@nJ_x
zOV7klOV*8keh^AQXjEFmkBisS{o!rU8ol~qL3<|Ia8M!#_6(N`8e)s_$yXV8FpRPW
zn{_&6O}O{v%lI7Xq7;19({kaNknfzfJQH~txBWAb6iD7BsKMx2j|X1gB1T0nAe_4;
zmjNd4Rd@T|i&aXS%DgGF4u4S|)k96JH-s_UZ_Kka<(<3IgJ%mX3<RjHdv;QC*NrkT
zByK+Aw&T&L6lF_xRl4Q<Hr$gg_t<o<i&z3OO=`6cD#DB;bd#Z)l{IsDi}PNf$rwKY
z>04A=y97udr30ul3R*J8QASwu-QM8xL7s6{f|IjT`NUOf5lTJRf|IzD9@kUrE*QQf
zqc((tNx+R%8LA(Ik!w$(9jrx7xgJ}K+Z*cRK<+Pk_hHv{3kA3L#Li&x^w}1!e25ba
zp8<S+<oN-ws`r{isgrG7hIzv<C{#G%OO(TVZ%IGWR{Z#!$-FYa+Kx!Zf=FcWB{XZ@
zwfeDuMp21qi>sYljj~%^-Vab=MyWEp)M4~v2#MQkmOR_{dqNseP`4<kdM$)8Y;@SM
zKi+~-T4~@Uo)WxT>SaehjXsMFEk0C6NjO6)byu4xXBQ!KkvYz0<c6Gzk%QB#KtFrs
zCh@!Es(iY)6ZQ^IUp}6;6%{{#6I{<+H@5C4$rE2UhINy5-GiXL7xsyj{;FYlcgh<h
zyXTZM<!skU^}0G*Lz%oQk0VST5HJT#%57x=)E8ep;>@(Db`oT#b*FVK*o|nD>AT4z
z1Zm$US{JzHOcTEq;4{)GKVP!l+661)AYnjDDN_qmmm*>=bvyCc7@V-+je&zs9JgW-
zi?zy2;Sib^v{gw<N|O8qN(m;#k@>S{A4T8h@NaCwpt<~8D=27A|CR=5ZUdfuwF#$y
zK>_|9Q~YnxIsO^@$v?6l=Ro~$w4cmhVn4}K0@+XghG7XR5<~z*07L*p07T%YCh$l8
zIN<w+f%3QhC+L%wUH-mmF2gg;BKd+V=0|@}&`-t3m1}8S|M=MQ<?HtjtCP!%TrLAt
zX1J&Xx5;g+p-UrW+!zdws<oVaj=~-i7AxLrF7FQuEbW3;tj8Ea9xPs=W|Ir-8|K7T
zvN7&qTot~_Nhz<&nQjhgK$Gh;T~zwKb(YWyE4TdW0&4@w5Jznl-u=!U-<kkR1WeQo
za|JNSU20#r`!Jp_AxZP?<g53zA%k<L9<U%XxKVAJ1whi_6t^U}8K&8IL1E|GVM9F$
zNp_@$+ly3xs@->#qz-2bkJX&E%U64ktfW>DfAP4`kO@9z-AOt`r@3!e0S9++SMdff
zXY2#<gQcMu{?ygRgvT`x?>1!R2gEM5);fjAN)aoPGu`znj-g7QDDK1?owg0Jj?i={
z=9gA;_gXH6LD1MzaoF`JtoO>p&S)ZbsK`GGSRTtC*z>@Kk+PNKo!}X-eqVkJwQ~3<
zl21BC#FoR9gwvdu9=VM(qsDbBQHQpZ;caMm_GqOi9)gz(aeu540)`O(id;S-Ef``Y
z4S&(Qruf)5A0fG-hm^D5Si)0ueHIb#IecvyIczBFio=Lv4Zcn-nUp))0i~q4W)D86
zP2$?GOXa?<?6R^cywD|O>pbpCuMrkEo8NV)5qZxw{WDY(gE0AL)qszT#LwMu2Hv!@
zc83dUm~4#G9}HlK1sWe%K5{5cy29xHHtdrwp6bTI;X1W=rjd)GhJa8pJ9F>GyKolE
za9Ta#JSja34v#5~Mq0Xi^H8f5j5@r?;?EBeFm6qu1;<{`a`*h?DV1b*tz>m#g{x-i
zIR+L|fx`yW6sx`wIlm9`$JQs~9<edWtdtfou%(!>5k7~unQH~uG_~coH`m0=NeF}J
zV|rqFSD&zQccul}F&$4FTrt@3R!=nTW9cuPkS`r)<RuMcYvjYQ^+hXq*u%fl3)c5~
z4~NP@(|!7GVVkhthrrfMh5>6#C2w}V4@xRz0}}7WdInMN96DPiv)W?L4ZR>Mn`SpW
z%*}OVt$C_bltn57sTE03Vb<V9^~p+a9iAF0A_7$>ja1ZP&zC*<RSQ{GlH&Yz8U-m^
z>F{Zr_BoqY#AnwO^#yvPQE(y_ZqrI%)wvy+E=3o*y{#(AsCWxPcM88BFQ2~Otl04i
zYT$t>TPL%DyR|Ua>uUy40LBr83~2)@dwBTX)WFTe6%ISSc+0#1ZX7FoH;D-r1!d_Z
z#uxBN;re!@Buj9JJvgn~+sw)p_(?q!ZOqq+AV-s9^l>V*c&J}ac0D0!3$q=vWs1d`
zoLz5Y-8b=1_)w7PTVB4dc(7VPN&2#+vv%38F`_NrN5$p!y&SOFjBpy?ENnOeK~k^#
z+|NS#Yb~C4;V?WyZNz8a7WN!rk#Q@^jqfk3+eM4AR2nPJc{GNLt{YYpaP?gk^8AzA
z#u@%J;9KJ@>=jjJ-S`B-Er2K1ic!)V;C>~;s3P|ulcB;*$i|%*ov09vjkrGTw37<%
zsQH1B3N;k(?xr<%cw)=G7#w@Me(!x6l`9ej{W1SGDrQ2u+(r{U7ETgVf-pBnFLA0l
z%SwZ0Er5rVs*rZxB7pZ1%bH~hZMuBOYpT}!Z>N5$k4jKwKm>j{0)J#r0_s6Pd8t11
z53R>J)xm^Lw*TSxDx|E=6tL5(o@V<Re$>DI+<Kh43tAnpbO6B3>7qRe$mv@JzGnp;
zfe3&IfCzvHfCzvH{89uUlTX%Pc%N*(TVEW(j{9P^4ltGnWxJTkYB5+ti9bAio%>!(
z71cX8EPq|`h=O3`{p_Bz+Rgj$1*7dc#B9N^{4-Yko3G(!JvgoCC!Wz@PC*V!-OgI)
z7c}9)2`g$P5#v?LSgLQ?hpfq&YAL)m$D_r>hVN`j{A8I@*mQiZ!>#7N2xK@TOI$y!
ztrme=G>wdnU^|!Zoh+WKtoZKgN@ShLH#jgqJy^B5Juw&5O`$YO5%?wJ%k9?jo^%f)
zG?q^&4#t5`CZwB+H-oIC)9_7)CAa*!8Dck+OV~JGw2w+YB~s*N!&nw{cK#F`;;+Vv
zXobo)5m{hHeQP>SKf0sdX%tP;CkLht-$rJp&BUv^T_;?D2PtIfODtwIlOK_V(TiO%
zh5YO%#VtMEg^|)nx;8JC24xXQAqu1Cuf@wtiM=EciGdPpptD6LJGgsSXc`~u>MGMv
zKVyaBWY+u(9=LpL3rTAg?`>QiL28KiGdl;5IrWA0v2IlLWF<lC<L$tcZ8w;?d_gli
zv9EpYh6Y{q5hv!QgN^Lsw6LS}0V83ft^9rmS<+5h#<lwd-m#&A(hMXcnxt>Kn+#<!
zKEHnQ|FQQLV09(g{x<Hx-GaNjySqzphoA{=!QGwU?(Po3U4pv>cb5SDbx(JCdhXox
ze9!;RbkCi8&hv!YIcKp??YCCd-t}YEsuQ`l!|_3AO|sspugOJRU3?41+#h@Di#WA4
zaa_(^I#5j=)0<~aV;|=*@DXc2Qxx`54GqLYbZSJyHekI;w~2ZNe*fi65glABYnTOX
za8t`MYn?`RZ8Ix{wapKmBJ(x|0-31xBrPOrE(SRg)UWKk_81D`+`_jr)3qe63BxEL
zIR|TKgawPSA<uqY`=k5OYZ(VOCL`e2tphNpkcLi4mr2_S8XiPnv(hQ=N4`QAm}pJd
zhhMt4Pox^iEqQh1W~ijuNngC*w091K!ekO-e;LG=oe~`=;v9;V=Cj6|ol(6Lv&v+y
zN4wVzLzya>d0+3!$`H8kU)dwE?lo_e2+q*~Y7NsVnWRI;BS6`#nHC5?A7LqgTlGp4
zS44i@)YkBaTYg{exsT4&CTMB*tXj@%wv~{#I7R7qnh|6UqcMulvO#W~@KY`;s=qV?
zR~TW2i*#;&6d!KWHV2VuzCqjj?JOOQYT@jsUIVf^pmehIPaZ_a=5``Rd#k;fAFP$F
zM5s)lB2uVk%2z4r_{1g%UcwJZ46weG8LK^&Jd<4KTW`h1Ye7HR^f|>U2=HS@P{4N1
zbhFzwC7~ZQRjAMQq=Oh^)PuzKE7oE1Ve>hQZY{jljISf_dnH&5&lC)qjZMJm(zIWe
zY}jDvG_BlbHo`YH6FK~DWNBj)^D_|`;w$0cLM3XV`b4j&0d~RQkvHOZPyLW31`y9#
zBWQBIk8S#&Ppe1iaGo_)w<H_TEQr+)QtK$CM}=Vwys-LH#X@j*`Ak0rF04B!v(9nt
zA28vu9vSG$##%~Q7>S|3cPAjNhhG7w=f6_|lOY>Tq0d?sP$t&zRbbq43`nk=ae*Ax
zAw|ROE|&N2-2t|~G&#@j*5X3|PS9$54H+6rvhHV0%pt|@1szpklw-;g950p$g7oog
zWL~KmVlj_P3a7x4U=MP1!LweuG&n$D`H#BO050a=i@+bFpZxRUalYv%KZVEqm;EY&
zJ^qFI$;@~3lO}(Fe)8|-(SX_j2mlBG2mlBG2>ek5e$`Jx|FV0bHxMu!5I{!x(`A(3
zv<5&;`P0>u-?RomQ~`)8f7I9v_#Oa(e;t8;6p#Dmm#_X-emDN)@ABg`#s91<Xn_B=
z_J1D_^6wRo`;YXbZydgr-{tY=$Ky1m{oDXQRrS|!n7`-$`|&t`439rVU)wMHr|^LI
z%T7x_|M>B5EI>c=0|x&2<V(Le`?vDD{_%JHrMBhgGvNNO>MzQ2@*+R^h{U%}zWVLp
zzb`d@c^iu8=d1nit$={^fB7I;^!M%Tez9u`|9v}`U+ngye&6ofXw<m#`*zX4oNsUc
z_w9Z$`-jCp>Sy2l>09|-zxwmXor%t$k^EHE|L1Y%zxox_{_i9F=2w{1zuzAEznrE>
z?Dy@y`PE>=@7sOzD}mVGxBKQ-1G~R(_sy@y`M*V2{{D6UPd{a5^v!gR!sPK5pA0FL
z5*>NlJ)8?&HxN`X_pmp3so)uc9<xAbG^7w`(k2lz!TPyruKW!mlQ!FqvlMY8i$AlG
z-=3?bspLEK;$98;o?$8M^7S*_?B1(`IMEd+@RUV{2=^8;^-y|^cc4c^D~(XqOO5D+
zkMkLsaZ3?A@Lb`LG9ZfzcESZ~McY!G#{KEqJI%~BbV2yL=fw&y+SroB+hzV8{=u?V
z{ytyhG51kac896=dsc|bM-ktPwkn5Wd3x>_Vqr<BqfmH=6JJws5x&zmh;^2N2uVkm
z&Jj5=FvYglW##*1cyA7|s=Z2(#zr5L`Kk6=yxD!}cFR&!#~n4i-Z)CX1;}Tuv={n5
z(%H>kofNZJJ>pYs)gdfjG)KHn&-5xt(d(;4sITyaC$nBbP-xMisnt6}P@c??r@0D{
zeziqqYeXn$jVB9bA7`xBt0yyP){|h^S$qd57bU!-SVxyrVm(AJ+3N`iYFq4cB80?i
z8@~*GxaA@uD?O9;z6k4e+svzrt**5!D<M0B4nk^k%N5f_o0-HM3qB-OA7PiX4`cBQ
znMo^r<;RRHx@jW*T((?XHDD{a`R;YKNv}|E$}pP${$@Bd`|DB9je0+zoJy72@duDd
zNnn*OrvXr$$BkTz4ZW^mWftPqdI4Pok$iJQWXk2KV>p86OH<{Z$NameR{_ic8s4KA
zcc_pwY2Z!!B-=gulY+yf!e|9(Ey0(A5*=+JTRFYqaVVll3msFG0*WfbvGIFlGse34
z{Zyjc1f2oKHU%Zc!G%jP=<?Tc83c<hpbjo6S~(dJv8Bg3hLkDJE+rw7ik=<YOgr?e
z_RI@YtTP(WgIz?LH)}W^aq1PZHFBR21mY^C{4@##Ka55(m?hby__$s{r6tEu)sho=
znW(#e$~H*|zm!>y{E(*ALYz-9KMD6T=e*tk7nYu{PRRRe!HU|=L9C;W5KM%Jj9^K+
zw53lTMP*V3#UY9i3b_o5ML>S7Knj+mC^=%#d4Eriu0W&>KZ7suyp0YSJZ;;bSn(m?
zi*F{{ON>&y@S<@hMkdQFeN{6slgsFQazhmlFtTwdt=x^2F|fVL!YOa@t%o29G|6@)
zbHUV33KY=@S602HV7MrWFXzr$KfJ$rG`p%zV>oJ-W7*}BP8W4w@iwUS^4DYW(GYZ%
z(?+GIPAU&hHbZrlnW>FXAr?ycTcH~#!93crsMLPj`m&I1I~A+UaHQ8|_6*h%#6BaQ
z{4N_(?NWu4dx#rc1-T@ZQ0#RS2K(J8i7xsJgRX>0T#nfAxAv6pA%##W5RrAtV&gAX
z7+9Vt5ql=_ULo%hU^kOCfWNecgg9@dy|vxubgw@MHcXzVsRxF!z*$;z?@W8BkcA_p
zu1RqYV&*uGbJ48)NOAWSzUy>3pF>E<$nQLuQvr;ZrIZs<k&(@{qv5I+ZZ1;~&)|6Z
zDZVqaEX8xfTbg5sYO1VwXq;cVK+9mQe~UnGB_n$enLawRxE}U%R8VTC5G45*;YTRB
zzV%yJ7XF5fQ>@erTx8rxv;#jVA(3sNE6C47<|l<#V^~Xrm;$c7-frhYFm}iC2ZhWO
z>A;VI1Bq7!W7^n(<4+|wF2(ZcW(-NFSlyQgJn^j-*nRD|@+DS;BNI;xwg-ss&POHH
zQ5F$%Qh?w18qT+-<7}STl328_bcf2rUlG#i-S)KTfpae($!!XVO2)FjOhC$5Zih13
zI_z$<h6h%!g-%JYc@MUog0l}C1Ciex60<G!?rns)Dfx)J1x^hY<&Xa~{lQylj<j(j
z)VU=ygKq5le>yG$ejh;Kk09{9{RqAOW)||-n(XKO?eojGdGN0fdVe(q903Rb2mlBG
z2mlBG2>eG80RK^L0rvhS!dx^s83bYLQ4r6g142dZ9yxjq^R<I4j98=@lV=YjeowBl
zYk;&~E5<BzxkoTM1qsDrm*a6SjCUGsiVaYb^0I{nih*O-6O=Y_%9ekC%_vh4{p(HX
zqSS(>LI`FG@D%1K?E=Bv*%(V*As_2K)tc+#-acwFuqn(bR!xQq?N?8*Co!$BqrHHO
z9N~q?^LD}JNe<_!>(^Z<<7B5yY(S&4lC0+keM%6+B4Di}E{j!`l(#A&80N3kQMKZm
zt_qTNkrRZ>I>j61B`>(D$gWyT&MB&RCSU^?dE)~<PnTP$!^WjB!(L(9j9if(ez~60
zDq-7fi@{3Q*KCoTzIH$Gfbajf4(Tb|aFp-)RroQ4VNoGORzv~yQQ>Vv=M4OIwsuUJ
z*`a5E2%5M&6U4>q4})0?rh7DQ8>8+%SQE0+FS#_NChW-R&ad-13M@(_2|h}KOxArS
z&U$%}?oWZM8Y)LND!{-|pcW#%Et|Z&ZX%a^%a0-2d41Zepd97;OEavV@gL)lzmi-`
z5)-+0CY68@TY<qqoE5iVh^*nkx;!8mtPmh|AVk3%oQyn$Bngv3KL$gL#Z+5gZDKvM
zr%XQa8k8YbYywu_5#rgtym|YsqgYtIOIW=$um)6Ob$V5dSsGcPLFrhWo{J%7%NmY3
z()l?qTu~{%n~q)2dT9_LuWMuwvMgDLP=yd%UsY{^D+|`M=IPWihXir@A}KdI8)%3C
zbpQpz7n!6InZ|TJTrZyG2N~#+lmQ0G$g3j_$UtC_y9p@$;dSVhB?XB@Lg-?Qz!jW4
zCJCiBU&Mn2kkx?)v^U76D%-Fm@Yfjn{IWOnger@LPC~Ye=V88^&R^k%xp$U(yPzj`
z8Fp7&_D@^`*q^Fowzw|!Ewxl}NJun`)^Exxot>2CeL!=Oc!>l3^mhE$mp3nMAY4$h
z15^zdLX%MG;T+3pr}e<Us7jznE(%D6q($T2prHo1$(`RfBX~i}5SwE6FHcNyY(9!y
zRPdIgaJ_$0(X{;58#vWqYL(odIy>9Q>Y&<XzN$-I#RH=tRihKRAYr;-@PD@M$K2m{
z&>YkIaG*DKMM8*sJuapb@CNCjvsYkNR@7zko-_msauiaDN3{BqxJ&sp(`!k&+o{~I
z;;Q1iUFt6pUxGb4xS6~cEzohaI>HX*(q-3(LU9cdE0_jeq8w$G;hY$jY7CVb%o(u@
zKYQN!s>w5)3%o3Yp}wWH?c~KqH&flO-Q#qx=~@@aVE1hruWK$i-i}NTjM)j~TKb`h
z0E;T9QS&CZ@zW4ap6Phy3MJh^@761l-KNtr+plKVK7>sJgb$*ExR!M$q)qJJ1y)9=
zBmp5mTsOcIgl)JOYuh^e?+H4wWIb(BwVA9?4%mNPCfu~^qM_mIViN4VWN{~S&${1|
zBPY9%Jt2H}Jw1XX0VkI^9!)V}NtXLn9~~}IM(u{6$cHj0T)4oYabX^qgm@T&IDnc-
z`Kc$T7HGk;qVfXK&!aS7YrX&;2vNro2ANHulNG+7gfbWM$8om%yqZpVfjln%=>sv|
z8~&rkE0sXZA%=?4_%F{ETC-Md^b4G5r3YMYC;H^^@5n?ZD*fgRN8xH(HDzKNx5-=S
zN1Kq2)<F`0w<Z{6sjMhTnp?neVTOpBV|KS~raM3_hIsX-xgb-MPRGgLM<eYXuV|=x
zdQAyi*kQPb=Jhjww$o(}J}Ltb6k(RRRd8e<QbNgjul&jzx|nsrds$PZB4ZieUBaEp
z230a(%>1V2nc@|BEhy2)99qMs@$`ekLM#)2tngzy0$%@q1iq&iq<%*)z^n)81^<4n
z2&fT&0Du610Du61z#l>2SH0jjG1LIr;7^wge$yHN_25re4}Q}c01@F&7ZHa3ve!iy
zFa*$ls+QpXHau?=*}72NeO*A<!5hObJ%MPUSK$&R$J6`Fe|j-BHziGxF+iGSM4irF
ztKTs7>&V)UC&{*UC&aYB^UlS3=hMCUmk03eAV_Bi<KR0Ox+pcai|~SsTV~qYW6kO@
zJlGMztCMHJ$Ud+RV@B(ec-SaA643UB*4nX&rfx&gGZl(Dcf$%rtOh9lJj~F{#olgB
zGWPwvR&J_5#2$T%6!;fcBiA^gMzn<5Vxg*aG+&|`w5Q<{AWNbCN-IRFTt>){leF!W
zFCZ+`d%c~g52T#zqMzgv9r2y!p|!a#T`Z@MCZg2lrM60BvFW8b)hlocqGhptz}L|t
z#$YOUjY2I=%WY5c2IP$liuyQEH;uHryAOyv`si~w)(pgeXtMO~@4w<EIB#5?Z#6j8
zXXBc<3oAmIeGQI>>BZhN1grM|+Pca%6DMb>EC%T*BnwuU5Nq0c8eV0quHT1cQZXcf
zM>snpeKVRdA!O`)aVWUl@vH=<lC-^KmC=IBze9WaE}!5j*l7*=$<TMl<kt4KF0ejr
zg;y+2Nnd837k^kyjI_8j3ab5L9!k0_D%j$YDm%+o&2?EA63AKT&<V9>C-Y*pM6qq3
zj$l`F4!!NBPSD%7jsO)pUXA|(BWl#wvncb0jgkxofm_I~nQRqq`Ej4?LDaZ@jGZ@G
z4Yao1@c=4#HAx4SeU*Wdrh#%NE6<q~(G9B<_Uz;PZ1L#xPb|pY{b*-vMwRBJh38J|
zgV$1bs4^&(%&-SR*MdRKszP(i1a0-j=W9o^l!Hpysg0<&o36<dVq?woMs+60`Az${
z1%WH#dS*CC_M$QIL^rsQ=92slm|LR(TP7}Vv~abLrzO;FDlR_Qb-Zq4MptL_@VNKq
zwDq28mhJ-a;H$kQ5&8;!R+pd*&EBYPZNL;2l<feHS>k=~9)dH4(pv;WURXlXeIp8N
z(w+FK5e!_Woag!A*-^GsOtbM*P)y}zCIoVCLw6u|&gc+Xv5v;Tco=b$LoyMW<4t2D
zw5_7c*Bh26qad{mP5kuno6%Xbu@!zl=+i}SN8RZS-+c7;Dj9nV`qk;I3(oP|-HTMo
zIv7;t38CE$wL8v5Hm&SQ-qvMg5;3R-^5k=ct440-gZYXnAYnl|w0Wh13y+tBJNigX
z^9{@a{KvGFf~(c(6^mE|^=i|-`xQ?5hc6l0OM<7VXi{v_n#Id~RJ+A({&ZfdHCM=P
zi1S^s0#iLfpAxdbYy)_oaTlRJ#H^Z^8_|k2WD8tm3(%i!nNbXQE8^*~_eH(62uU>(
zv6<+#+{)$AyBMC*+CseWe){4}(cc|+b1&;u&wpx3Qi9Sycw+X*4)k7K#I4_`2~oi!
zr-{1Ms?-#I%mvEgZNx07y)$lLaJPz}Ef6X1JS_8(90Tz9qCJIKcrss?r-k8`HsdXr
zmscCw`=n5tsdBVY4s+5x-Yu%Q*L%@XF-k0ZspEAL<}gn#35Plp@PuX*3xs~Vq;#ZK
zei061g2uHlu`UUVxG#}1j~ye|45sKbC@Jw~Y|_Z-L^1LsW(!Cj4Ih*@wX#`6g@~0x
z*l#;*#}jvbd~P#_Po0`u$zF@tS!hObp(n7g(~^F^(==MDah0XGxKw>-g@lrqCh<U%
zG@7=OMP%37@U|O2Ou{%<=x9dr@nSsl-PSt1x}1~UN8RwF!Z>bVCQ>3)PNYVh{A;RD
zqsJ^!9;=A8V-`IWF+B*`Yr34@levVyBXj*Q@V7G8w=iu$Sj-<Wy#Ra%fB=91fB=91
zfWV(g037!>VKG0%!1~D)#wOHHwf!(VSThtVH;=&8ko*pm5+TGaic08VXhxU`m`)6I
zX|8)LCh><oOya5yL-ZmF-aFCH9uNbDcUp>icpS#5>@LZB%MP`HbqE?NXr<QKA<MA)
zbSRU?*2<n7#Ie`wXHHz5MbEaRwx(ffIv$ry?%3P|GY<Wi35`5MO2Y&>)l+$Hc*hdT
zYFSq+oPjBvmOkd6XOlCVeA8UGv*Zk;!&jszhfWGo5Z<2TX_((y_aW|?3eU6eqO-QV
z@*08LwKp1>oki$)_4TDnFOq$sJJ-28FT$rI5Ce$m+6N<s_~kf~&7+@SIGt72tqOsd
zbK+_mqHMATqg+exp~Z^3!4#M2(snZavollIjJ&ZH({cPnwiHt8+m#I*6(QPwMJ%wj
ztY@?E)O7nXmkw$7Mr(9CO>y#6ktj*bdbxh{AyXgvegAHE7EgF!*h9MsWAJ<QRSwD`
z+C80cGGgN0;8{*)NL+H5;U6=>pB!KDdi1B`l4>TI{}L7x0{%mw9TJw6Br9Yh%sW`d
z=aVy(o~?1UcBBrIEIPuigu$<&m0omTqTbsPJ5n~}c@QkT&*Zd}kGIFxxQliQxwxEd
zXzihar-2E|FObXPbO*ti&lARvLe4$QQ%g1%YvCZ%a@uE^T89xk)5vqnseG>_)9BgU
zTMBJ-VguGkz4<D*y?n7vHlD2Fyu3+Vq`9M(!KW_T-Uvxjb)`bTu&ALVFxk~V)B-QI
z+H@-zBAT98<4Z=Q-S|B;j}B?plZsT$r*yJiYWEY$xnzd7tG4^mnlGN8^ftAIW0l?5
z-@Y$R`(SIqjUuj$Z(FJ$oeDW0?i{b=XR3p>_>MmS#C<H(F|iQjRO~W|h+`5%=U(ny
zS>OFpHhZg4$JgHtM-1n1=WW;_Dcq~)%6e00)sjYd%__!G0{pi&9sxpiE^k0f&l_Uv
zE6+$EbUQ2vY_-d29BwoaA~T1fO5dXo3Q^INxeydxkwzxmw4=cMFtE!~0c3@z=j$6$
zY|yH^sK@t9GxROjI@a3_y@zwi4rfJUa?{bbxEwQB4o|^JF(Pe^j6`_|dC8KwALx1K
zR5*p#E|MDQWxQ|31?G8w_QTu}qI0XAuaB@gE#<tY7P$3n-t;s5Fp(yNV{NRWu|Ybs
z<et}?JJQcI#Qv=|RFlulQE7B&#FA%)qlvH>ZgJVM8@1De_!b#LD0r;PnGje)&4Xva
z&dMoCXdTB?m!@kB*WP=s<}^%$ie|#1uwtUh3tS};$K~m3SQP7|v=-1nqyz1PoJ;KY
zF$7uQb6P#>t!OY98OS37OjT$CHp~$G&M&8A&w({!@q$9G+0)ftF4|=~noD0#-S+k^
zq=P+BB1xddAJ-!NwjJ>1%4l^v1HAQS2)%sgR#er!3-$3fG9n>tUy}t2caF6Nux^I6
zsK&W_O^|EUQB~B);-+82F%7O)W^vAww8|P9DGH6L`USxa>nP;DJFmypJ44FiAe@^m
zFK0T3!tUy}vPUU{J8E7g`Z~%F9^}|&WKHjGvD5eCuo!#plS=L6<*g?nQZA^qu0|^=
zF<-+8lky2T(ZNe~%bK%KWJwMbIO;C)BK>JG1wK<z8Ph-ZIyJuAY-4iVuQk~Pyk5pm
zL%#cQh7FCUCrnNHCZNdZMdoV<PUKY>V$k(IJPGF2S>UT5gv0&!!?|@!9+N?PpN9uU
zderDhD*V}-@of+|q>9MviX9m`+Qp4`-~M*W^=A&;fbRwn_#YAYW0r6KA!!oSKmROc
z{8N{23tas^-fzpdHCX;uNBOV6rT@zt`|7`M_ig$1Lgeq;eOtc$!^H2~eOtagzxVrg
z-<EH~EB<qqZ+}mR()*4M6}0rX^#kZo|J8#4{|68N5C9MW5C9PPPbKiH4)vRm5`Y}_
zr^`{lX$^oX^{1;+ziADCIQ6HCQy>2lZWZkV1_aQk{!?#Dfb05~6Zl8zT))XM-^$;n
z5rq1&cHlrzf5~hK@^|SegARXI7LtEjVfpLp@6(BXQ%Zg{`BM-6kuLX*!?*IgJpQ1x
zia)i1z>ojFsYg)1q!$GR0SUVNxdDEv>Q82WdHp?q%x@q4x8)zh<DaKfe&g}qY?sss
z@-w9WA&=ir=Q{Qa#c$<z{p0WYOAqqT)ieK_5U{^*um4^B<r{}@<#&1f`Td0&>*oge
zsjC07{_>5(xAOmu#~;#Pv?%{6Jbu5wd>co<mEYy@cm0Kp<LB!A@4CPIw}b4T_-4y3
zK6)#?7U}i?GpvTNl`1b3Jr2=!W5*J)v)@(X*8QME?^sg3q;Wqs73{LY8zk!j-jc2L
zq9bX;bo4c0MD<NqVxC_5A+lIEL)V=KAyalg^{iq!4IMBC*Yj=+U(m$DW}579n9foS
zMm=_%z%_KpMr=bdBUj{8WNcFq@Ih~g-4l<u&G8|aBV5|DMwU@4`Qh19ahfzB8>6U6
z3(6VbjcSY#?##lpUK_Y7YrF_@dQvN0?P=*;x!a%e(%B-NT7(OUYSBC^iW8#s0uoyD
z1-!kKL>zZJxegZ-_jOkhp+(u>Ie*a;qd^r(u)FK3n$cT#Zwl|&<KfNiLC0t+4e?%h
z8Lu&!h_UZvCB_V*>+%9aW5ia$T`h;#BQ3Nw@^hBGngKtpa5M`w6<XlXg$Fwygh)z6
z=F0v!={R|so`2d#KU|)<M2V8w@gn!etI}*8HT&9e#~m5X>m%mYc{E19e%NGc_bW><
zB0U#0p(M#J4N}gim^*<~`A33mzTVK8yd5Us``}XN8XoUQn#%gRh8<w`k-{%TdB(vU
zr{TicU;`&!hR&?>s}_CeXA6vPO4$_#K@5re8FEGZTqv!zb72N4SOsisc~hyZ$^1N8
zHA=^Od&qktq>x@=WxXpC!ssgwXfvd~lbN;{CYt5QsTJ67SJMVV)58-<8r@+P7g1^I
zW5+eLc<7P>|18|^ihw*M*glIlI7)tmc&d@d3rvL+A@)JO_NCDTvz$b?z=LH>WY7Vr
zte->>G*7zAuN7%$7o9%Ep^W9wB4tul2c>$Ll^)4V3xfuK^o3&_zWF6fS+!(9@&fhE
z0PW{4;Trm}+f<i*X#^)}^acoeFz|`eUiCsRumfIc<W@p%ne6qk_f0Lu;GhY%dd=>2
z?7KwOD3i%Gj6G4YGrYhCb1hab$UI(g8ub2_H~cM#1dU}ba#6=!UpfQP7ph*A11=rQ
zcEi#-K5@ER4s5)w%!uUjgz<hNG-zXEzjG0QZM3pQ>$aG{BCMh?!t6cAfeKx@QlJ&Q
zIZvB6>Hx+`9Ax`MT`1;KjHw@nQr%D1nl%sf_=at?cchJO>@iHt5&@Ne7?uxm?~bo@
zKaePrE30O(70)x!9HZEOU$@pwnB0TiJz>Lmp~q+_m-Wn2u*?1hEF-Lq>r-5EEeWs?
zI-4iP?(`vh86uBHwnyKWJq=L0qfFZf<hKda!WoKvDqK3#gz|__TpZ1Q-TkkWCOqcr
zu1#HZuPRzHu)>HA@TcmGK=_QHCJSJ*;z3#obnxdV6fZBCjYrz}`O|#Epn%p)cBOV6
z#cmD=;1+3W)lX!IG|{qB{Y|sgmOM2jcVAy{G0e`R+6Pese`t7I^$GvLZ)pf}G$Vq`
z_96Y20rCqL^jta1b=FF1U7f{()+AQDp)T~8A}d2y+Domgfq!gmx$HBxA}Z>V4DL1s
zl&;0TN+17y9@$3@mB+Hnfxy-c#FA_yU46{h+~wGM77`ieRtGoUkG^SN&)~T;CxwP)
z>?jmSKD_PQb#f4wgGZi-&o>lpI3-LL@{OP0jbM$*4xMik8WB;J+%y%`p(s{|98P}I
zb6zfYU;nnpBB4bGZZwbQnY(W~$7L4%#wvo}hjKN+;ml!Jg;<)GL8~vYu3^5~>Rwfk
z3CJ=Qc_U{bH3S0AJ}J<DWX%8qR(YEg3{lU;Mgf$?9!P%vT~l5!6FYyHtHX#%y%ny2
zC4A#TStmmHk1LIWE=?na<%>39oLe7Phr_PKR&}hOn@IRHGVAaBC<LZwupZ({138sU
zDsubJ)=!~_sKzw2K)G_pHR5YfrTG%0bxbLbA|?b}gj2Oq{M10)E)KNzB008(3mQ%Y
zV=(!<(7}m1tSSm0L6@~-Hq;eJr8Q@48q8Qzw6smaB`CPqynPBemX8(?Aj^*3ZDfG#
zPt}jRfjD+bK*6v<Q5CO7>k<=g-vGfgen>p7iBVJkC=3)&<}2W=LY`!oG4G1&aIo=A
zC9}gr5LgWo!$+@+=apr?o`7_TafAXD^3E_1mlk+wUNb5Bix&jhjlz*s%n%r2!!|_m
zhi&25@#VKT&w~ti(#+myeTR@AeRD}It#ylK#oAqVrCZE#&v<)RJJaOf_hKCa(=B-1
zYG5ii#>0PF!VHnX*fNrSN@EhJM2_5N$O$%69JkzQ%NwI2XSKe-9LWJnBR5FK67H={
z??YTYi7Z8yM3rQ?fNxZS{w$(CfZ(}ZLZnuF!W&nzn))TR4;>oHoLPk4$u7nD<2r)y
zAl)z+%Xtf>9hc~U%qlx$x>r%yB#5(QE*sh=VgIv2(HYZ#j?+WbkotVK?*i;a=pcn;
znrpbGUpmoIT$z2VaxDMugZ<OYR{Y8WVuKYkp&V>z&1GYBc;Jt_zMpdZxLzk|_b^2#
zuefF);cIiB^4Al6B5n>=l1p)TOT<jy4d1l{xCfehA)Q;E4nJ6|SKyvZ2F`NY7JZw(
zwPj|ImRzK@JYBXycd7KXlofI218nn@rYt!x#3t^q>2*4$`YT)~nsw^^#vV9H)2XhM
zOd+y*^2m9fSMRf)f_Rs!*8+_b=o(3%SegE)M}qIg3v~U>to^UGX?*<Kc!6)vZNEM+
z|J4+51Rww)03ZM$03ZM$@E=70GWlo483gqFhs%D_T{iSX>iW`Khw{2cD?wxJmDY|C
z^RiWfwZ?aab#t%gq8o;6Jfg)(p(5?Ecmp4S3Ea5w@?^M~lUDO<0}fzR8NyK_Bw&u_
zhDz^DA4ZiI94l>JsTPpr?!wqY>Jc(h2Wk!`f#xozW=*i-Iy=XeiexK&a0?NR2OB4D
zeZw*Cd5g?_Mx#SNa0NX%T1K-f;<#bH!#lWxY1leMr{**AWH(f@pg(Ej-d>s&RmO=4
zZz)0(ym=X|kSY9nR!@YF$Y;94>6Y?r<Fh0ie;ic|Bi^M48cmYZmHZ%_0jP7p5%Lx~
zwy}u7>)hZD(o<(dss~NB9ZUobxN`UqU<6D~w)oZ9NT!PCeV|#Y#6zd(TuVoM{=(8+
zFe91PAOVC{0(ae>D6@RoTeJirV0_cVksd2Wm2KvS*S#a#^YvEql<=4MA^Ou3XIK-Q
zrDBVkdM#A+QM@&Rte`@YBl;CCR5Twl<qGD|Kz$UMxhI5oMa<?N76{6;kw=TBKx_rk
zKFy;<dfar-y0OI=)lQN_WNjoa8tO&cqdiCfw-N>AgFRowmk5=}2`IZp_Md@qi*xZ*
z=knUgP@YI5WhSs~Yx5Oq2(obWa;Fr@f&fD#v2XsM0a+C;b8RX?W{K%l;qY9!=OGBB
zqg=xRy@T|AX&0U+i|u?zrb_Vr0UDR;kFM~O8_-)bC?=;!oda*TKOX15z!okkYr%PS
ze$huoB-<gI&{iyZ7Ip3jx&?}alGIkn77D$6vu^0mxkUU~XjQvdEm3M;ElkWQX^1TY
z6865nJILL=2ecI#C3$gWWu&`?NfEx`2hl7Dnw~eBtFt+oTio>wM3Y!4g~O`}ynKpn
zz?H9H=ez^s#hgftQWIw88TgJs{3^~X>Ajh1n>uM253O2OnrL5TakOmKWUynkN#eBk
zMc-O#drCkXQ&kcY%kvb|Ok6LcGRBy46>86#xG#9}pkNkssk)oTA}nZ@YJd+xG(=!n
zoW`I8dvPq~Qg)j@3qfq**Ds@9HDPxISL?H%a2#*Y2`m{bDkyBo6>LwT>#22#RXt?1
z`6F5b@hh{ypDToR>qy>$Fl@()c5on^_EHsaD0A%t3+Og*Zls{MSHdE?Wgg^>-D4zh
zRtKDf#@OHXs$tl~b-U&Emt`1w3RK?Sd=NLZpF1mCH$OZgU=qR9M%5{?cWPuNcC)1r
zrI#6(kqhFPWOX}Kv9&*VJTo&=V<bn^Fk3umCKNVlgy#{yW;48BQ+nP&ozex37L|eg
zDE8yB_HxiD?hfFmPnq}h<>V+Z6{RJ5*3t#~l5kE03el1BrxSSBkc2Hr6n6Glq*Ygx
zD2we$XK4MWd*dIk#Bl0m)mgzVjy{t7;s#IB>hM=rIUB<ECW>+PJ9-fiQDzoWuVmXy
zi}K#0@GkUSmSq}HFk3F$28Ro%vtq<%J9|5RHWnPilfgbxmp;;WIXA%QKpYN|p+9G_
zWuc`xPYOd+&Ud*mni_n0!^ybn6w5B`8-FC**<U#UM4sgc|6Ixr!~Ky}tpHg63SI7s
zWYm-ZDbWQvJj|4Y4<iRUL!4NSi<}9iI1_<&-Ib#Ke&ZPSq+3cZwgyK_UZ?5nDAl?z
zQ$q%<sSOt=G7=&wP{UY<ZyyBMp5XcENr>4lMj5?muxKZcj~%d-FD7RVuqe!NNX7?>
z6elIm-mw|vp<uZd#m}Z%B<GHeGE-fM1FMoSpiD*rV9f{|#-0LERqswp!BdX0yo_*E
zoMv<G>DE5>a?qZUfCGI3VYI}8pL)RFyUT`?-3?-k81DnoX27k!<WN5yYKm>roOX6b
z=~5JP8mg+#BsORGf<Sh#?napLwalOJU1MJOYon|apB<_k4jwYg`;&ZD8-pSZQ4UI!
zn_B7_lqPgCJk4y;qg(?L+wp7g9fldk1||$3>iI0=8<j#;t@~ogbqhp}=<Nl95-M^i
za}F<-8Rw6@g3$hFJ8#3G*y(83R`VsIU#iDvZ52CVuWT4b_}$(ebfhGTPP6qyQqNgX
zyc^}Abxtimv$JO}r$LwlUl45x`(^o7To)WI%MZ_!^YTX=6O?+DS9r;C6Re5{knl}M
zqZfe;`;cj8%RK~mH};jTz}IWY&!m}B@=rVxmR9t(U0H#{#~VZJj#Jro%dm9g{j}E-
zkdCqG2wA<sm;zSUmLW27#Ibq&R7VK4l_rf$`G^}LqjgfGnAO|Ig1*Gw!ToQmY2Ma(
z4Zkk9B|(bw2|tVKZtJ8rT{3t}2S$+@GI=_P3gTWd*r}p>_;Jqs#{Hc!m5Pu~>K5Wh
zQ5loIVH(#l_{WVVg`)9Z9ln9W&i1S90z<Q7q#}+N(nzYWE4=&VCMwrjx1RA^x}ZU+
ziH9Zh_mB?R^y#LK_+Kaq=d!6frtvVoa{suG5!R+pui1RHs>9%(GA6&{>phv~HE0Pn
z3x~Rfm-b-21Td~VsgAf9WRb+<jWKOjZ(*p-ut!)hbI#c!wxr%pBCWS)1dni5<W?MA
zQT3y$5unJfToo1x89+VFcDH%(vZiZZQv@1vA?d0;_-;i(UdC?}ag!?b_)Y#*y2AJL
zf|c*+1$!_*|CQ$3;}1YD_(lRy0tf&I00;mG00{i26ZlmxfcYtES04x%7!e2{8~mS_
z4SwSd0QKPiyn65(Zvco0|K~-7(7)U%yn%q>fB;&;zo(Xvdz)-h21;|X5r3S#JSpiw
zs!*uhWa`p=m=6#35>Cbw*JV-@#ZiVVI9DNR%1HdKp_-PsRCy#KiK=+;^~{1H%=}DG
zmzsR=Yw|L)SdPm&F8i3=Vj7ALQ5f~Q^<YmBVx3M^?9ZRgP+sn`TeU3DAFVb@xr!@b
zvukj`HFoC)FcGFS+R*TtyKHAat{jlSvf8=C9UWj#Rgp)v_!hZc&x_0TOH+xlRtJ<$
zW|mU2mXl70DD6*Vkx=o#eDX=AUm^v6!RvAvX(X|SDztiI7qV_4nH>#w=}mP=3)lIM
zHPQA1FS!%{{DPqw4S!J;4*C*|DQ3G3i4ADPww>f#vQkZ6p^Xs|FR#qi%_~#)Q!$3f
z@f_9!p`qza87<a@Ly=ZOWf8ACMjlt(O6yb<xd9W8q+>$9H5yiI(gS8Z4;p!|H^73`
z$)Xay_8_=7SU+Ze;weETQsbZx#c$x9Dj@Jok`zGi>exdYeTrzO<8FrRahrtH(DXdw
z#FvFK9o;liNNTx{+^5YyXI~Ah8U5Im&P{Cs6G?%Ntz0MQGe1t^Hl!I;Ma&$0;b$Kk
z$JoyyVVH@hIH?6rNk-RwxW@M8d|(UpIf_%RAF`)8?8<cQocAU#x@SbEOz2*5&1NDi
z!+Ud!;Au+*uB8U7fJT6^cK=Y&N}(k6oxnN;6G<0Cnj^;wgboVGR!NDSp&CY;B4<a&
zyEU*mbJ$U}Wp2+J*$dkFj;kSbFcr!Xal-M`b5UZ;F55$+Ij)XsC|F@~q<&;61fcmw
z?Hf+Kh2`<`rqDeUOBSpK9^-m>{q?tVXFsOpScxKm?AtU&i+wc7zKk;M1HR9238ZCb
zm`Ef=?tv2us8xYEdObqSn9TT2;Y}Fuw5a6^Oy&_}L=_k_3s>xtcl6}P(}fB~vgX$F
zp>{zzq=KA`afr8pd-wuN4v(N&iqH6<Vcqgi89tG(#N@riz`WaUPeXF*vgTJKXY7;P
zK5Up|Qilm&13i4^{xoEYfGhBXpLY@9<n*Za1wM|#VmLYQBWN*sDyJj0NyhBQKqzo%
zYgV(72vTK69|zbEf}W@Y_qi)?d?*xp1MzuASm71=JMvaY>TcSX6K?VvV9MjWg2#ey
zK`M5A8Fo<aCjF)n2(NYxNBk)GxbYJ#OgaXEthC|x9acj%(KR~5gePvOLO*3^&(>zM
zv@S31AmzF>*Di%rVn}zkb=%5zP*%Z?;;{>j!V28h7|FI#GlCMGFU(9(&?T!J4|bq7
z-AgQsCT$eG)<fj*?+y;ftEmghrqdhX@2yEO*{{da1E;--OMXjug>bew695FYtA~Ii
z-5f$!mDa?cqkY-pjIyUji-wkbsaW}dUt0v%LbSmG%Z(PhW2{Yv=Ax&PY7Hv>tk@hF
z9MlI%3+so-*ybIEkBJJ$P#DrD;LF*{^)RLjJ_@FMw+Y@sWE9CfJetMn*R*?D4Gr>$
zvllE)S`_xGiU11zgU(>B<giq2cM3G8Xji(eIMX{lxM@A<7amY|6U%{YBk%x<raDVK
z18Yaoq)Y-Vb$K~r*E`l6jbg93o6sUKWm0z%tVX!ohK{;+T3M3-3@HfJhAs%>YI`qg
z_EliG=$V5RW=&F_ylqO@!f^Y-#Hxf-EP`i7GSLplhxW0?I++K2hyjm_v$wWMVWVu7
z9W$JB?AelBcdgyQ$m=Jp)n8FwrM)bQP?wlOgJ9`04)JdVcOl^O58Fg!8AisNsW`<y
z=DJXNa+s*EAFJGQW9FEG9SXfUc+UXFB1YfDv#h*TBX*tB<i_gQDJMgy;<QP5A9vxD
zMiwW@I1%8D#hl00O-aESpHJrueKi;`+l1mT?u^c$+O@hAk+~<NriY>u!GR3Lfr9h}
z|Hxn<^OcSujYx;d&2ZnX{Y~pu&j=Xwn~kQW7%u7iz^%fNBZ}DhGS^RxB`oc)V-OxM
zVm(rsnJT_y=d?8;;DP49@0T4~wBGG*ia(%XL@2s^slK3|R>eHcPtRh4ex@}6+65Zv
zHKn*}w}Eu3qk3b$6@<K&$)UvQS2M88g1xtA|9%<BVh<zUL&|qRQ#kI$NsB?b<<Sv?
zaxpa8XscfhE9pxNa~=V%wlC&qn=m@;Pu)=RiB%BjRr`ukUv6~-4)HWI-?HCFv~1jv
z*2S*8w!#+6EcQFlu_%`JV_ysYp5`3&H!EfSI)~OG{9DcW+gb}ijM`rr0geC!00aO8
z00aO8{xb<cnExh5t-i$pLVBwZd(jzjTAbv(y%%QY^KGFL8kD$Te(6|7%A_f;J<(tu
zsQk>aZ|*a7A(aGjwr;_c$8wh&zofoulLUr)!q*4C!R*S^C+s2UAZmMejBd{qI&T+0
zZRS+{D6@JnkxmAQlcC*l?(1Y_1y)CN2a7mxdiQJs5o~X=_?b4#LY&#WE}AA6w{T5N
zr4QUG)wb+lTt~5OD`&+k9Kh`Z$VV~!v$yZ__!NC=#*i)4`Gm~ug9@E3gW*4r>6zS#
zrClQPL#l|UeR3Q*qozzjbUiNfgY*>`;iq{<HAcszM^ZRLAW^h|ddM;DH_k_4?0apm
zF%)b4!N-(<@!e;dI`)!mr^H7$4<>l{o$Y2QVs5*lVgL0fc8mSI`N7CXqfHnI_cCi^
z@P<9Oa!;y|Iq$ENOzV2|l@J3~(mxpJ7Y;sKXiJG!Gho&#)F6fj0KdWiphk*m^Wl)O
zM9}w=8CbiG&j6OC|AX5a1$bhAwOAYqaD+za!l^YOK`pu=r2%FGtU0LpmG<ZNd4(ld
zL&_?%Ilvb4)+f`$`T^4ZDG}T+((fa9ZKxiU3u7<(TQx?om%1b;KX(z};<VBmOg3^N
z8FE0zulUy6@}wb1CpIl?r$shPiuw^AT?KJ+k@50PIqxf#vOSEaaxk_OR>5JM^^WL@
z7RsVYeVoAfB{+#qn$;8^E48XWDdlU|WD=<MZb;jhLGb7Ew33^W8Veir(5HhO)zbZF
zI~|E~9u%?*bs1<{0teB}z<tguzdhbAGa*B&D7=D4?ov84OnE69{;Kp<c8@?2|3fO#
z_v+N1DQHOAu9p1?6HLWTuSR8pqjz->&l;t)b+?xCqP9&?8Ks|I&yd6WPG+)WSy32P
zw+w?FyE+hK;eW8cG;F--lm8I2qoUX=rL7PnH56^W_MUXU42l+{=i1_^aJjZeXG^Y7
zZYEAXRJ6m^O+InWV1%$g7BWgbvbPf5)9wYb9vKmp1+w=nsijCJuG9JP9rv)Q3m^0f
zvKe2p%$|IcT_l}=`$FbQ|Bt(#Lr9lsrX3Hyj8^6Fb%H1;!9pK%1cWiPbmvW9+?^7&
zeqwL{hB8(xKzizmjf9Klt_T+(KDU_wPB}qY@pzRNE4YTTg!i_1ZowXC)dUtw$=Tkp
zEweyuI<_j0xZ67?xg`$#%w5tyCPrUMqFx{!#^(ppBbq)^>d0QPcJBO7!BAv+_KY#T
z(22rSCdo-T**+WtEx1gr#a~5_UeMtzs(pu3^K3G}9^R^1jB3=lclz8(MLl4X;bxEs
zs;{*e=T6H()rqG;ufGf59UgNdkq@*YpL~7Fd=gxQ6YgScssb~}ZKVD-;r^V^0UPzv
z&gKHNllB)k!0BsP3=Ds3s62}wJ=RsO?G4u0Qa(b$+z?w*n#q(FO)r!b!YrCD0U3oR
z6O+*KHT>pF^}E9E=TA=SkI>~$db%6=6G%+Eim20qM5Y1Qp#|Fx?Q|p%8nT{PEAxnq
zGB)~PY-cKya*`nQjaBNS>tLS=*Npr#La_>=!iy<eR=aM9J3g56B@cU{y`}L!+@_^^
zC{4eGxHT~KGq`?bq%entvfA)N60Sh4$`3EVCKmptiinz9G=@^KEL;MtFP(`6mXm(T
zuyTD5TuyyacwN3$OkPll-NLexU<{4f9gsItFNgP~Qz2L5iqDGt=Xkyk2nypp;qXz)
zlV1bv;P8$^<@;jy=BX$d-cME5Ro(LqrPAPQ%Jr(~u`fU>e6Cb*ncN!Q*`B&H)|vLg
z5zy&}-fmA5Lf^Vai{B=c`;vQnJ=ThfahM4vW;0W(@g-!~+Zo-n+GNuZcbpeYP9qM4
zK&Jw8@p>GXaFsU~CqYD-VnU;KmYvEqB3dp|pY@Q((>Vd@M&$L{TQTlr%5>v1lI9M(
zO=kYdyU7upY3lh;?sYs1kYgYHmft{(LKa*a8)lPXQo(Um!NkAM7_mc?*IH+X`e1?-
z1G?#Y;yl%(Yv))zY0{63SAw0$Ft+J|9(cA3bV)K8-g`^R)6~vJq5%PR%gWyk7C@T7
zrI!BFv{C4s5vAqB&@(I*d@ZbugAM+;562k+E;ne#1t<pLXDJF|_VO`*A6K9;N4EP<
za^>!7!D4zuk|8blj|lV20`L}CX#anwum731*MH-y`kr1;`W?L>j1iz0{2MnXplSdD
z00IC400IC4|3U)4>IJ`vLjlMJ|L0|c-*^K+J@`Mb9{k1|03yQwc@aVKFFA+>fI)!%
zC@rDdJ9Hn5VDT}V)e#1{bFWKburM26GOZ1kNy$)%$5QiR7qV{LFvjEk2H{wNZ|KG@
z;c%I!Kdxq5@6HUQqE(o}i2|a?MOe0%>uUoRbX$#<oz;9e>Z*!LeC-STz~*TBJYrE6
zJj73@nHWwDr}1d$Vgs2Xtq~U50u44~%vN>J=mG6@aZ*`X#GT|pPZXBVOHxml46DA^
zFB99~e8xX+W!3~E+YGu4(uFu^fTs;yg@Y)Thv|_ZT0Slzvh59X`0;K}msg*DE|BPc
z<-LsFCw<)qp*2(4*C)L5SOX4QEuv|8b$y8Oy+Z>fhm<v?(vPcf0Uf6$KHv_%sLv=y
zxJ5LcP+cD;2ZXUxa-Nqi$w!L}_fkT5+!R!;+JVw4>f(o|Xf3F#`z@h^bZbT4To<RA
zsg%z4>|rmvDBBA}s0I`ws=svFrHiW};f^PLKG|Akc10L9r$fTGt^RR=>T*zt*0qE5
zg6-FtwAzFxHZtih(-CvBx$VG_L(A#D($xKidE>JQmBTCwbfQp?X%~<W;*&OVR5xy)
zuq%XD0>tEU`NCo*@MV1XowogageOF$g}SxEOYiGF?;1|e?><+#sRl__#Dlmb=tTyX
z3_)e~?H=oPRi*cxOr*N?dAtXg=b>9w5WDcv(cQ>sp1qmguo2imn2hpiy~qrD|GZ3(
zj&Dqkc+iRAV2UYQTs45%C!i^1&lP}PdI4QI%~}Fgoc6l7sCu?g^>ksHs}@IDQ}rC8
zjWBueUN;Wtp-0t}#ThDg1z4#361bc-<+P@UN*5H|#WXzzjzwz<@f~h$s8Jh~2236&
zvz!`;s~^7l<7mF4-Q|NQk|5?Q<D_A!3RN{&!SM}XkRv<gcoS(Ipl%Z7EBaj&;<wiR
zxvp1X2{seUPd*tHC6(TDp4%thoqS`fVb4osb>YpaL5S7qsrwHob}#FZq`*Cpb3wOB
zgKh0D51*7vuXc>q3x{3Km^Gb}t6Dtm4W*@LeIwD+XYx6Mlhp9js5=rtWxNZgdghuN
z1PPJF*aVw(M)B@hD0jLA>`S`sV`f!RV`x54KtI$ibEHt_l&7>9V);mr=#5<tePS4r
zU)LAX7S}6quTHO8`vk_fkol>;IZ(g~BRaX*-zy-{@5bu$D+8dQXiy3pRbfX9a1JX^
z2>P?cA^BQ&%h#&LJLa1ho>3E!KDizE^HRNZC<Xp^>cuJ-i&6D>;gJHM6O=Ur{Vp9b
z$q2<!#%=XM7){e}JthYkh>w;|p7GQlE1*8kv4uTU7P3#w*s{2sHFhW1zlXkuCD(j&
znvwFHz;IR?nYlE-dw`{vktpBa6s!lGoDi7*2+K@xWoYycNx!vH$U-}sV;YJIGq2QM
zT=Av~qv`}QqwL*bNUKxxK;D<Smd`hk3r`<~TpG#p`n=uO&1rzD=;v7WS9xQKfhF;9
z?-b(Yy_I%kKLm_;p(Ad{wRNj&Lar#<8NL(#WNmLtdtQVoaHQdT`$?w)2qa^~04Q0Y
zgC~o4r7tN+gVefE3O|^ZwMzMe1DLZTXLgiz>1cAZjvDd(cG(p_PrW)x;~4WRR-?`!
zks`-;@DrC-B9%nC2c4c<E1qQ<*)L8f;{!mIftoUm2oBsOkHn&2dV_4+>_@g?<8PJl
z!KmcywmB3~RLP0Ub5gxyy&qT&Lvf#yw@(Oqm_rH0$|FaVZs$}iJ*vxU^Jf#e$?RyJ
zEj3w%C1wy-OjoQ%Y<U`7xs7Prd&haITTU___aBztgY4cwsW)tLOk^3&(l@|WLgPq;
z;a;sLL|Tr@;~s%uAoQ2A?Tuo?8W~J^%lmof&z2cRoA|S9qu%xqwHo!bbe;Dj4phZt
zl5#X&D+x0sotxdr?W>pD6H3rxV&a@(TlZ2ad;m{TjL&i-iDaSInTG#R<@?^@&Q!Q&
z_ntj!-ToM4L-DSo#HbnJOPZM|+1yKN>x$7$Sn8F3qlN!^VpQgBC>8z=?nH{N$o5zN
z4G95BVtV@~@uMGG96RuIKtbw415r6?Q>l$xx_R0To&9tGi5Cjp@A3XiDYy3eM@$wg
z?AfPHo~hoL?%39Z<@K>I7~Gd3PnPadEXY&Gc2R5;6;JD?eDXZCn$SGtF!oSV15VTs
zWJE=);upr{$1MbF9Mrs3U-wuTU3nXB88lCgtz;m`xjibaeDYBh!txhgmt^d^-}j@>
zg?VE*2jRxU)!Lq$txC9-fgj+=M5Yr(LClgsK+^3xn>a$EyU~5TlPJWlpz4&vc~k4n
z!{Z`tjCtw2^_cS|quVS-cT{}m&456n;rL;uU^(xlfr~SN*SKwNtNv{}FnmGXx&eP?
zw26(YE_K)|W9q^Ovhm$*1)Ov?6d+k)!Tk{iIqL2Rk0+NP=+3%?Ci=;a@bOTwu6s|`
z4wD_{yAqO%*$@G!&^^3lnn1aXSE7rBQH1vBPdhaDu#93hTEfUwS_!Zxfk4S%GF&?u
zR@;qRkVvf03?wl%v-^vVJrLwzeS8NdZZ{E-zN7FFIGk7KbZnQoP;|)E_=#;okngG)
zjKF4(xfQUkS)d}wK&uRc7^>$N>#$Pj8@*_k?38}ogbwmAY0!WE@#7l!U(#>`1OGJ#
z_fK=zZ_A&*o96b<PlLQB@Ut5JQ&oSBnEU%Q)W4rL{Cg7o_tMJ2Oa83{|BchH75EoU
z`~jy15C9MW5C9MW5C9PPGYR~<O#G*OVmR0dqk)Z*`{o}Qv?}hE<GLOrsP1fC27^1m
zHKF<T!s`cgur*O4T(9`#kyef)?HqE`%?|5d-c8f!HC`r;90-wrZZ>q^c)~Q77s|QC
z6^Zo>NkNU9#08@swb;?ef`FmY5lp5C7I(u)kue|%1FajN@t`x+l&WWKvsA6mJ!x=)
zwI~X>@6fr7B#+H>j7dBVdzPfm=M#j_(7sFCZHB3QzdB5ppIZ=(90rcmI5aF9zLUH=
z>e<(u`flb1Yn((E_SHN5fH0ViH^rtaKiHgKi)&h#H+mU$)tiPjVY)?IfWybJx7O-Z
zAh3^(R?QT*93!kQ?3~@m&~`Fq`GkK0;*v-^VF>9cf*>u}d`!LmVR_mEB_v4|4w0I(
zA2UOo3p{n7-#=Kt#)JXCt&~MWu#Wu8tcfuLf%toPm#50W+3M!YJl7be(-65YL1$%B
zTvV$d>;ZM~4sJVY@&o*#(_RAw)e1Z~I~m)TngJ5=guWSZ(_=b@7I7<PrL~th(x~1l
zag63vph#a(l+k+nx2)tW&h#WtM$Cj>v+5bC@)$Qs6NgrXMoL*DS@`CL`C7|w*JY-$
zTl?$FRX)%UzH=*_^ghh-0ZTl3dw7e8nkRq}uK%%lRV>!5EKMa;r$hmn-9GMwwGZ|H
zzq~ixT?skTNH6!XFkQ!G`SbAJFR@y-Au80_47^e5V(sHNTBYiAuI9VI>%=(I3p+Jm
z$b8MVSF3(N7n<2e&U?d3TG($->bK6flIO3~S@sn&U9-;J=mp>N<B6@hrD`)@%d10i
z>Ylv$xx6CC1R~buXyG(FITyH{R~T$Cgv?nSRz6*sU47sTsXcJfS@?GAj7#av9$6Zx
z&ssXq&`y@kJxc0F<j_+>TT<wh3)%(&*IQ3*+uM_x!}J6<DP{*PWW|<4yFx%#p9Tf!
z(v&V_ixJF1CE|$PbbIPr|G)Om0y?fFJ=9`ZvRJak%w&PZ%*@Qp%*@Qp%q+`dW@ct)
zi<!Z1CSz}A-)1H`FUju9MtzP?UH|>dUH9v{?dnf|)y<{%$in3^PQNn07!fB0kFC^u
zthHjFY+Z!rj$`IO`ACN9G8%V}IdFP|&AeWs^?`}2VAQ?;duW=(0@eFH4W1|p_@<qO
z<~4X5^*EHObxe-b1$}%wd$!vK-&xnAKwEB^2z6UW9RYn3Rn`%w!~PAK^FT+~C3n`b
z`BIrB-ePmy`cN9}P)*T3BeKxe@9bDnlWJ4Ldf_;tx9q90+Z?Do03f(s=iNj?4ivYO
zuH8%!_fFE_;%pH(Ct@crT{09vBPM>g{%%~}RYN{|a+Q~iR(`ZQ>lr7W42o=gPT{vZ
zY?w$CXxw2p^nv}Dqzg;HYaSxLbrcW99$>lG6J~k0gsyQ}LMZP71a(7hRM|ig#F;a)
zioiJr2BfE?mc~6LEa1hdlMKBjOvSw}_cg1xtlR1G8ryyhgTpizN8Hl=MG1!MjET~#
z7mKmui4~+1B6+2a6DBYqs1H1&xM$q1E8Gjq9!I+Os4OTo`V57Wi)`>mJ2#qKCB8wL
zb$qs^g50F-X%zGpI%;`KRGElRmT@&Q;q{=Fw6dHJI83v0(-;HScB)aTwbSVbky^Mh
zh3<N^UdG0To^>ZN@~PwO);RalJJ~x|)kDF}2my-?b(JKANMm*;ptm5P!Jb!~0T#n&
z%ui&KLZ7JizkNbrcS5V4kxM+XGQ}Z<5gnE=O{9;B`0fFIKVPK!5Xi5Z5U@MkMZ5$I
zgk2&h+=Y@<wkgR5yMWze8)bKQ6CqIc=w=}`)*@XThTJFh4KCGR3Nge#Yo;hRslKcG
zESo;1q|4pc!y`fXU~f5;t1-~3L$LwY4%+EuXYyQqD3G(hx_CsTZOWcyuU}3clUrx5
z^BV>aCAhT9YoE2WbPngHaG+iT4{jRpxY(K<cE#tOGy_K{cU_AiY@48uAIf%Gp}t@r
z2~gAAMdgb2XNHUedM$oX%v(>4jZ)2+D(4gGmIDP&KFx3S_SPAJbYxRD39ZDAO7umQ
zc}9gHw(e5sw6PnO_W8Ubc8@q7XlkOzfioJ48-=g$0E3m1oiTPo)fd#G%h)*=Kfg@4
z4I7#zeRXc1ZN4p^f-vqaiC4^VS_NuHuwji-wZivfTR9{DAv#JarDPSs?T>wJV#0=~
z<DdxCQvk2W(@z>ZX_EY}U-le$xM|<vxxLG#Vl~K!0hBn1=w4iX*||JQZExVPqy?%<
zyVHJOR-_ijx07K_jFe^YtvfX+oI^qi>J!HM8qlN@puroBN<?Eh-$LBP`!$BA>i3u)
zYQ{jFrgoZ68Tyn=0)tdwNazLKxT$i_6fr7!kq?3^?U&R>*BKV_)2=C;$}wt@DYP)S
zd(n(;RD`ChYT5g4<&*|8wcsZx)Wt9U9Mnn-#tzyRD>LbLnTyQK=?k&8I}p(H;I`QP
zL_J?HNp7_I#-JNV%A>3K2(bj^W+r&nfJRo{FI&dca934N`#3l98LpJ*r<k%)#t??4
zx|UrTK#TK=zN-Kkob_jRR<78bVh|^6fC!9a{-kTcbO%nYQb3&rv_KO9vPk9HL%A~K
zGMInf(ftWRG*QzQZ9Uz`NrQ_%r$vJE6SVOcz@w(1c2I`w<*VXE_e>)BoZ|f<L^vX8
zR=a4`Kk^^{*;&Ki$DjONZkvQ9^WW%C{%D&$36(d0@}C`pZ$JM=;Eli=fj0tg1pa3d
z_=`XJeVNjmM)}{aQGPcX-fYVMdYkgQ(eNf!{?|*Dkbm7~@6E0J&vh$+0Dpd)<-Q*i
znxF0O>rWtmc@FfKQjcFNRe=8HHp>zwznO)WzuMscZ|gyV{=b?s{d+#?Cx@TyAM*G+
z)#Lp79&r82{|Ci)KkA2ofq*1zehdEE)nC*8<?*-t|GXaO@5AH2&aeIC0sPC^{(Ag7
z9v(jbB|QH8dYSnRGr-UG568#fTrXKMzjaUOuS%Ex_Wk<*>w5Wdx&YcVn%ch}yUEgM
z>ilSRe`&R8f5vHl#A!AECI0zm{PU0a=ilOVKjU;i;&i{o8GgnYe#9Ami!=R<GyRA&
z{T8R8{+B0y=@0d<Py9>!%M1U!3F+5gJNrN2AuA!t_v>f0)Q`S8f&Dy#f4lQWKf)y1
zf1c;{BQJ34&-1c=<ay}-HadU&^>3P@ACdg7KhJ~vkq1ZdH~++cYvAAe9Q%2_{%rs7
zy8Xw0u1BT+hWOX6{y%@N|Abw4cOd}k5^NXtr8{6D7eZli8fAWH)W3VZb4$Jj68$w#
zeh#|j!o_g{z15+V%Q>8PQiYRAX!5QiP?r;Z8YMk`ad*0RA?aoJSu=r$YzhAm4SAiS
zgcSB#yK4IYaFJ`S9wve$QLl9`TkMlH^j12ymqoZLEuaAFZ7MWW$3(T+Jd)qp+GaKA
zMyXipP)s|dwNbf1e@t&+zBYhh6|XJwr7I}bNgbb4w{p>_SVcZmc?_uy^o;+lEl9lj
z$5+IYhN4M#o4sA6MQ4_fUfg;vH<?&ae8l_UVR@8mFq@`z;q4|Eb}gn#e`~7b)nd<P
z#sPD>l_nJMAw5Nvu~v|+2cl^y@~m;JMe~SUWu2YZ!`ncS8B%glADs9R-_RQcf-$X|
zU}~;S{OnWUd^N4O=MMv}dPO3sMPC-_pHMN+5=;G{J74q!3er$PoK5*}PJ1EGkHo^u
zW<FCUUvv`m&e22|<UvOx`f1#l2-3wcfrLU@HUsPGAgDZ+hMuaqZuAO{fq9da=c+x0
zWFw?@x$oG9iI8s_VKS4D@ttT7l5sHAiW#|PB7d1t!DiIK?u#*WNxHUQV&Yu~K0k??
zciVE+3m0&4h5y=qFq|l+EM&^SwimGkl(&B?qhi$%*m64<o(SIA2?lrI9U^}Jlt3_%
zAAV>t;+*S8uliYqK-(uOA;>eLxQ?fqR5=tqIes3120(3JOLF)Tz%~Od>7CF!0YmqF
zbu2cpEXJN7K+~W(ddBz2$S9_tmXf>{8B4bai9Ec=BoSbWpzE9HXp%ocoEOISM>RS7
zfX7)3#KoY+Nx!Vhhx8(;mNuMma9RCQJid<;zlFo2F(P8ue>?H0<3Wl;K3{gS^;#=O
z<<%tPBTW}_p?mc;FVVhAh;-;#)V4l9Mj}#sUIO3p%Gg@i#OcXeq$B*sw|C$n`_d8F
zD<eX>Dpc~>{5cCTVPt=$6L$?RGb;)NmjyRt2GYJUSf6tMX0wgJONzJcm>iD#mQoHQ
z>MNBzm0reB=!*zLm7YVbzBHc5YGW*ao{EbO)fJ1>QP$e!lGlJ|#zZl0l`Ag~wpMb(
z#M1?_*lroFnH*Yx0=Usw{ugycua14&Wx?9gNy4jaPx+CCBe|H8A@|7{hRZqob~sT}
z_dZM7JY_O&N>w`Wn-YgyG#LdWrZxZPBXJ@SQ{lVNdQk)h5v9vwiLzN>!&(}#uX<bM
z)OI9=z2slBu6Heo7s*0}x-4yxLH28PsaF;1`(|(65#sHm-$_pH45#EYTBjC|nfBDF
zYMzn9bbwUpH7&~d&578{sy*f`r|ElJ-Ur~fuf#i-xFo`Au7!(?ot{>V5+!5ZyFAM@
zpn=Uo)4U=;fXPNIuK*&vx+FK$IifsoXAZ)&q4dQCl1+1DEB0P|9)MkE#s`;_7Q9$A
z$0DJc%euQZFWQR9=(3M-aV9VCS1;##<^Pzh7l}5TPR?RTvyr}~ogZ-jM7HMZ69qaU
zvOfm;+}LF0CvDkI!5X}<y1YCU(`0*c1=FY%CwVq3GwF~Ho9Q5sY*TB;Mq*t`77h~4
zGoO0X{A%f*Qi(g47=U@wnl&jHMa^xHHsuzW%3p<jOE23EVs(k`qqDrem8<$9g>1EK
z4o-O9iFgAI59X8H2*1_-{61{VsDN-I13{uZd53jO```=gAp!ybU<;%_Cs{HN^!Anv
zcym>7V$UrfBU#|~%03uBk6fwbr>0e4c_-wgTLA!c75*(-=b2Hpofwn8ua!7{oFIFi
zT<7`ts^V<-e2<y<cZ|T43FH7Q$zi6_gi-+|<gm;wu^9>XEh#is8lA&I?w=&sT;Y<S
zA+}^wF%(z@pgv4<aHEL#C>JY8tozu7_n{{$xXqN;(bPqQuyk~x`J;5(v5Rymz|p13
zprDO3`QOpZ3`QN&X3Q@Yi{?TMdonQ(9cO)oBD*OTk>6FEi(s&=8?!7ith6NbjtXB)
zlBTvzggLE0skD*0b9mA;$e&q#z+9wkc(A#YM9&59E^ofhD^L;G7<5x4za3|2Q^&8a
zi))17EF|bJt4lkaEgxNZtjx?cx8y(kcuyVfx-V>MsJO^yrCS5k$ZXtC#oJh)exdkj
z_>c)l9!LgjJ74Slk(^@36dW~%vr8k(5k(0QMD|@PU@U@7D<s5LnB5FsoWS%r@YzMD
z{%eC~EYOCKO_%vX>Tr2t-%TmO6wBp`-JCBwS#p%lJ59`YH7;s3VnS9+AM4Z5^QI-~
zr($*}-roTuujM?j&VMHJuv05`0rns`p|r@SX1TNb^r=j)6PaZB($@6(GH!X<odmO?
z!xJs|q%KK&ycp{%<CylfS)eo3QOnf(ncA;L4_zSbNP+P0FeopSb?m(k`}a_RTN`Z)
zcax`z12P4N4{~jCGN3{9)`WDsqrWjR#5E3u6I}>KGa^xXWWarx4MY&P5QWoM60n$)
zuOLQJED6t;a9MB5GL`j~2t>46J@GY~U=|;&H5TjZTvP#Ui+st$P^2kr`uG(?wis^)
zp@)O+b!eNkY2l#i)Y8qFWV4KeTES(d#a_5mV14sg<o-(rl)5;6dM<J>3l;<D&bq+k
z;v`?vjR@@VK}6@^OaQP5Ljt?CQq(@4gl^*d%b)*;|7Y4iT5qxOSKr$I^%Xo^{(64*
z^Ly}LCL;fu`gZh2;Eli=fj0tg1l|bzQxO1f`NcSU{qo1}{QGxX5h-Bl2Z>f*%VkPu
z>qb%wKpbT};IMp<V|mC*-Ki*(hf7b;k60y4fY>9U{1$0r3=pRr(AHT%iNiwKXE*dp
zHxX=tg4%uFu8PKqyWbX&D_ZzOQ9<Lsv<N5o?cKpDK43Ni16L|P7bEzZhp|p;j!was
zfSe40j=z3ZlJ7`7k8f_yDptyI19;#Lv0jqzN-O_l^lCiFWy69#d;`-XYpXna2q(13
z(nz$(wn<F6;7g=w>P8=bnRyveKFBTDFEO7_Uq_Ehf~;)?Q-nDuzjzaftp4byzhSRZ
zVPMy9v7oO6=kj5~G^WRCu*x4}qZJoOQbiM*u<vA~t+fG>^Dbu#$aD^oK26Lq%%<gC
z+9;;rx_@CIdxENJ!o1)e#WB#X<$%}*E?jv;sfJG!MA?@fl&LME9XBG4C6GvQJTtTP
zv}zc7@?2a$l!o$b|B`pHqF7<smBJ94y+PUvA=_FMy?!;=cewh6TpLY9+>SEtyLV3v
zTRZHmtD{lm2)4!d*Mhdlz<D9u=w&C^jz$HrJ*r3$bFAVenBJZ;H)v?VoybLyB5vHu
zg`DmMXme{qpG+Th7Az4qKX-r3_~s#7SJi`CNxqfolxNdj6-qCdh#|>WKpMrr)ejcp
zG$)y|l1lv?!yWvpQWeSTB_WAZyu|B|`_<+0b}a=Hmcyzn?*s;pUA$pSL~-+@ktn~c
z8KS7lSZyS2?|_>i6JG=<@kGrlMk57%um#)^OxOJ^yOeJFfln`P2!Q}oC0f($a6}lT
z+-b#<Kl!$v`7^o4%DJ?=@|Zr+WqD408LmszbwF7=1AVdgk`v*hmVnAbvLS(z!8chM
z2>kMmrj?)LaE~u(qVY>_7%B-#^tyxHwJa5eh|8ztZCYWx=L+$i-SB(K-e~T${P%9O
zaPN}NC-3~j&C43(Fh!N|xhiQRoZ#jsedKpc)T?gn_NlQj9*{7-I<HTh<ek!Xfs(0=
zX8iR&u8z11aul>jXHfcfB$x)<?2qYdf=wT=whYhae^^ku+PgMP2@Ztp+rW&K)CIC8
z_@;VJ#&ia|RcP&|dZCM$!$qeslj&Mw>kdE(sbXt?So#4t)T7@YOOEtvW%(A-B}Z8N
zh_kT<)1p~3b1vq3MS+gom$E2M3Y2~rhJ~T%uKL#3LBP;vnN@U55vMiV<MqSWWN(j7
z@+E|5OB@2qOc#_yxgz<lar}{EO5Rl3+m~fF#HeH*K-lL^;IBtp%-YBIY33)j@6%2%
z6+pkQI8W|3Qi2Q=Xsy4LMt@$%8Jq_&pz`-vHIBKP*U9YSymW*xv}Z1z)re~H2m8Rc
zUL<jZ@CZ6@wxd!wkdSkHv_8{d;8q8cnbZ%9VpgL>0?9Bgv1gzpb5i%A(X4a=3Wk}f
zR2ei1Ew1}6`ZL1_u2pcwi#vY0*d;TLGG=Xq6%{#SMtEXDR9$vks9`9xET%m~9Vslj
zb!DKnUr!x{=$W1()UhM}BqC}$4Ymo|`(oM(R`TIYowbuBl}c&2kgp$DHF9!Z<2@?l
z4ozKJrw+^cH9T7TH^N*NV@bQsljvX*jfdi-n#7`+mCl6r*Q^3I1nwZ1rYl}sIp{e)
zUt8nKQzGD-%Z7zD_ZC9Z5mBzh)2SRC&Ya6WX*K!wW)TTb(HL=YU+yC~bGT%s9kk|^
z_?L!#;L_PmR<f@{qJtR<5!k0cDrHtu`2KkJa~$>nPMgIG1ko$Ym|Wt6!Q~6EIGKux
z-{`DS9i|0-3&$L<@5PT;mxzJKjZc-Z_ATSE>AGSO0aWkj3tO)7l{uG$L9k#fQ3q3y
zI#l3hNWM*JHP>VUukG@dCpg9ToIA%k2(B5ToAA`}KJ5Lxu(!cNyGiC;n3GYB|CxN4
zLb_=8yD1;Y6n6S>YMJ0#%XyBWl@=BJRKuoM9XN{3;8BQtas!&9ONG%r&T--r2kh_&
zTCh`PqocR;(*-77%_-O;KwSrZnL4CVYsh;;GDJCE(&qpOIPNtN(JIlZ9{2T?1Mw$!
z$FK`G3JMRI8;J8rzrA8!Mut&%Mo$r!J{6l|i(8Mc1dYG!T@kw5(r&Kkk0_$Jl+?vY
z;<Ju$*$V$iS$`=q6B(3~OiOG8#^&xtuAxf&V$41W$nCDC%?q!gg*2ej!uRS1P*9$N
zF3+OxYV*!)$1@UDFH=LUzzR4z;>7lTb+?~`OZSM;;8An2l-n#?*kH#NgMPxCv*1Yu
z-DpmgB>daAd4%9LkKIY&m-XH_qtqdImT^(<Fckn3dws_~?cLAdjNFhK0PQHRP$_iM
zLL|zOW0eq^M$F}vJa8xX5+b7D{j;TAMgfUBSf3~*vl@XikEVIp!Nw`qND{3Ly=ev%
zH+|Sjf7$+K<CECgB6So2Y*z4o!TF{~pLga;qsdj@hgb)C^)M>6RMx1yVPlEecArn(
zZj0t$`vExF@i&QGFH?vgpHIiM0+~b@F<V~4-B2e&13uYcTE+)8dXlrCs(+tpk+!kP
zBaE8CzzA5q0la;8pu3Bc!sRi24VHSX{na-3sM>Y3e46Hl;yE?XPl&wePT<o4w8Vd>
zQ}`oaaQO$m;7iDxFZidL`n{dZ8-X_hZv@^5yb<^h6ZnfS_)#?q2#5smW)0q~!9Um<
z{BAV9xq~-%@DFwezZ;Ej2I0*h{7*FqPpAI#0KlRaKAL0JH)BKAPH0hM+tzT<N;FG0
zzDpS)rffW|;{_brqp8fKCJS=lS(c=5qeV@|=(6H3x%5e&&b&hhd`7>z!Q<j=RR#7`
zhu2ti&_)<isB-iker?BMQ`?zcRza$NI_|G4@b^nRQhCgYXbBQD)9>~a9zLL7G8E<^
z)Q9JxgQ<EcBNrVf!vHX_yiA<wb8OKR>of(-iwi-wPWY&&;AiLXtjFuU4xJbL^@Edg
z&_0y_g!6P$peHX<2l=Eq@Gz{K)i(rbHdIKuj`N3-idjIvYJj=*T~|Uk*A9gkzkJ(X
z9)?Gy;%!Yix^KM;g5_ZEEe}B>5O5syi22>2vfi(Ky{C`e2RO#cU6384Mbe#BtceQ`
z<3p1Tbh(D3+^>4Vxmu%fuuWZ#45H>8#$W7mDz65;XwlB=0%iA4>#a|N_H}PVXR?2w
z5y{rX3Pg(&D2USW*ZP=eixhiPk1io-cjg+MLY$OkRp5$~Wn0mZc;K`JXO)B6FFcE;
zwResTr)1Rx>CZlgJ?kjB$46i<t4xgF7My)K@(l0ajDDROFCdaF&uM1wFO38keN*A;
zq@R6hR=Q*du&7H6Bnss1RVT6vVs)Dg9$Cu#xpo2>F`og@Zoc9Z>l08={NuE<iY45-
zY)Suw-OQ)CB}5aA#p3oWuOL;o2gV!T6CFDA$kmTA2`kQk2CL_YsXVCE%?|g4yAfw&
z%?H9$!+{%)jr?<RWXPjIfMOk6=t*`+<97C}X;bkJ2=cGOTgl*4=_z8?ny89S*fSs6
z=}l6Yj9-^VfpYJHBS#oO5poR4lyD;#Nv_{%t)ke{FmV!=D@Ak3(O7gS89;rm)lFMp
z5t?fRNGCHrzeO@q;}5XOrlziiv_lTiFhUHo?>SXg*DyPshz#E~GI3)#reQWaL{91(
zy|;7ieco|Y$QQlkowfw#3dG2tlT-pliflPydu~nvQw8PcHwaZi++*K72dpAjywYWe
z0g+&<9-=O7n0TE?<2I|Y&hZ|;hte@#1|V73;S{WGQi#UVt!-}OMrbfbKeUx6Au6m1
z^81W{iui=W1o4rEDc1iwkXtlu2$S3-2M;jnidti)d<WEpTHoc65aBC|jUFJK2HOPf
zZ8QxDVh5Vp$8Q;&v#R5o=NnI}A3EGG0k&&P`0%~E@2JPsquELwGf>abE<DEv;F7Bo
z&Y5#InK}|GXTwIh=-QdW9D-m*yt}g7nM6y>@#gwFb2(i#s%ZgrV$Wh}%oO>IdgO(8
zFZMseL+Takijk$3SCD&DNj{D_0vcP*oVq{0m{nWr6b*&VDuV%GiqnB&iV%i|mJ{_6
z44NyQ?;NA^YWRb#^)M=W2~mfwVpd^Yh0Qj2#F?}Jg+Jutk2o_D>jbcl<<F^OLbr8h
zD&RbTcj#44C~m)E0eJgTlsCvF3i1iVPM)qf2z$Ep4CzK!<428}yPu277nPS$ZR6C6
zVLxZ)2bS!4J~w>Mi@kAlG~(aB$&Jg^+-7~^-4F(o+0A(Vy53=$-IESFpac{3WjxGP
z-k+G`!3NFrqqcS(T=iuY$nqd9g;sA!1`MW*3ad;p*o*KdTF7)nS2+1Zn=a6&(=s6q
zoPzdOMw}qqS!rQ-dA1|PS8?|+NT@vJ$5;<Qjy1~+N07EKLZo)#BJbM0x<LdzMjt@o
zbWuDquD19v2Y#U(vr>I0nk`Y6<C>=;fC~+wZxgaT`o%Wk#KGhMUdn!kCw1ssNc%p3
zxVmy^P3}#u7V#=U4;LTG3e7iN#F+=;f{K<E*_5j72hk_T#(G;GBT3d@m8?W;!2*~W
zPfD_P5DuUVMWn~J$WS2VQE0G2_&CVHZfZ`e@Z;#m=4y+k2V(LWmJV~p6jWwEaE~pn
zHK}wYNknx$aqYgtGWKuH9wl!ZP>ZXE@_s)C=u5QUznQuiExFn->~Pp=yIji;FC@s%
z;f7&{N4PoVe=u|GDeNZK$L!^yiX-`qy|hSSL}obK;|_D(>_HH&WDp@{kvLG)Bz|0<
zW)FjiiFZiG_nCstTr1tN3Xa(G%7|XHwROIk6NJy7{>FgT>kv;Xf{SViR_R*o^L~eA
zj|*wZyxHI(u|<E1HFmbJ&QxhoW4fR`+}=Rt$@u$HIEzT0)(lxwWThdCTLEQ^uw$v?
z_qQSm1LXP;jO%`06I}y}QR<^H(=n!fnO_EGOp@WPNZ1R7DphahFGh|G%_f{YDVT$U
z>70?Rfkdey9Q{w~9_JZ=MAsZ7arf7-o7cqLf!kJkQRGt*=p&HM3X>M9;xr+B<CIm4
z`BgY?yaP~x{17N?@;khlpQtvfVPJ*VlK2+NZV8&ETBXlnTx!3dZS~!D7}&oQAlp=~
zrNz^CCV{U?`i!+lwq8Q_rVJ`UCxjAzBY!G><v+M_?wwwHzVtqn{6vKuTsSo3y#5M7
zXPbU;HX=-xG67cld}1JoT5B1PrGD4eMQhyx*q+aUC00R{nzJAxhE3deT?P9fvkl&@
zK3S^8G|W*9p8%Ic4q_Li=0!aUBRMy4#Hq?w`X1=uHaIEg$tjhZ9#kNk=w=?i2Sv=2
z(Y5rM*vHApUZD|w(_m>HJ}5qQvZoliA4C&3@iRR$_BFUbHg(00@lvu`A>efePs@nH
z1C*=#*w^LfFT|O!%T6k3)-EUj=`^muxd=i!qFA+ZodYt9-b-_9Vs2r6H#thAuOYTw
zE9rn;njzVT5mkIB)Rb173L@6v=2H`O*bWs!=pD9|Si#EK0$T?ZtaMQPQ3vVLeFlwu
zkpMio?VbIuytRrIt&u|@Uz2{yg}0SOKS})8w*U9aq9E`;UkUVcWf7CfpXYh~=w{;f
zpXX)$$YU`6>)#Ol2>+$|xw5FX`_J=!t}Md&<z~Hq&&MDAM$G2jzuk+D{J%brpDT{u
zDro<0UT@Ji0&fJ~2)q$^Bk<2d;GNL#D`@Rik=kK1y-g*}Ql8+Z3O&$LiOtYZ=`C6L
zQMzQL`@aW=Rd8uV%B1Z|k)iHJ+xsEcA*^1AY{9OFCLch-4LT|~q0E1Ly&OSZQqH|a
zo%ia@UqY@O-OouDK_&)qVSTv;6|e_+qFd@G2irSH;*AMhGGJV>B{D!wJ%ZcbJ=Si%
z9*x2yR!)7lDl9`uj7foiv*eB?Dplg7tR6vxi77Rg;ePQNPeq1S-(*t7v3X1O;PF)5
zaEzrZuu<aecMTHHaKsbeUSGf~5i*cau#X_=!-byNxOCyMM(_>+L^TuQOQmzyXuHP!
z*oC}{OYjmf=2M51a;h?Zq+T2rpzI7+RG%4o@*$K<v%ZAJwj&!?UpOtGq!f(9oYg)u
z(D4#Mx9~(}e^b*rv|X{Y;s}67&5Sn+zeKbSM!l+*#y3PuB!dg<v!Ez%=z0SjdJp1v
z6^6CY-XkbXsYEMXhJtWdc!(0OY8Vk;X!Wx2q9C=a;zG=`n0GW6Fv(s%%*nPJN3#r=
z2v}O4=!boC+nl07@!Xd(TursGBomRQfz#u!!#Xcd4SSLFQ6DVGi50@Z?zV^MgOAu4
z3B$Q@245HW+F_QhdM}37e%(nPrvSm0EQpwj$sZNOyEtREB_(f3M7>B<0#rQ?y+1|N
zhM>=)KQ+iH3cl<!z!73rVWWsm(2nJAD&)Q(t_*-uESE1+#6b#f><fcQf|~&!rSMDu
zsT;TAnZ)>nVg+^fzO!vH%XP$uD?!zM-ayye-GvhFL8i>1Xf)m-k=M;pBI`KF_Jljs
zyE(_#ku}Q8lm{>=pVzQO_-S9Tp*S~(&Xe-k6OG@BDAh7zZ?7P)@{7PPQ|I3*xucSO
z2h{rtOSG6$@-4L~Xa;-<z35si@EH^)!Vu~)oKVIRVbcU9fXbTPLccuW>v>AlUhtDA
zkaa$RY`)t_<y!c1x`(YHI2;n$H>AU}PvpVvhj3m`&Uh2!3p7>JP|2An6DLaULXNY9
z=mxQ50eUCw1Lll_<;`;yaGKjVY6zws_r2eT=%n$?+a98Do~U)S(&4Hg=2>Rj#~~oT
z*8g&;<TlFf^mKcK=7qD~(qWc<1~!~jhkU3`h`C$sV8*KlTCnOEDk@CHosT!<q~Bcu
z*MC|$*qoW(;kiURB3+36(bXT-4a15q^#R7$Q2hR)rH9~^YL0*@7@VwEU0_=fDC@IZ
zAk3@}+~yD`{R@Jz{*;}=w1EnqgQJUHCDZ;CW!EfSw!J&w18~ka_?MNB6USyouqZKd
z@t7l6C{8CpQ9J<CE-E2LV=jptkExyLD#>H3@FY14;dt$)&fCnhhiW;eooIzSQ8r+Q
zQ~?tdWzi09-$mqS@=e5o6>Vbc1E^L2C2(W8sd>8~vs9c!!|~LcXjxr~dul*JXR-N&
z+9XbebCnH?bdK)$-?Qjm4&;<y^`$5Q4q}w(s=}fihKsAtS7YLo=`$#9&igAPeoNW*
zfjXqAlt9S1jtYVOW!0^i-(<c8CMN+Q_*LH_El!Yy9g~=?xl?2<F{IB8sE%*EJJZSb
zn4ms7qo4J6S)rD)In`&tQ8_e*C?QruDcgd8u8bFjL@_SnY;9-N(6Ecbts2<O<P!GV
zkoS22b4Sk9O_>B@eMs75LHIJmphKj{zHBM2_})Qx-Cj-G=@)`oRk+9~4>jO%Tzde@
zGZPhp$b1Z>KmwjIFZByQ$KeoMqy#P2Ce)E6(763^rlGX-S5-5N*4{7VG(jsu5A&6B
zF)&o$A+OA-9nWNPxcCF7TMx45*J^hvzjO%_Y9L-vnFl2<$eLH_m=%L+)G$q=Ue%-R
zyh7>pI1r2e^0`y+$Of6`?g3--kt8g3bk69`@|%(ZAU`cqt-t9}gU2Z!UxiGa?xPW1
z%)5>N&Pt-3`N??`<7Cd<)|1h8@(UzC;Z?8=XVKm*_{lLS?v7x?^XH?}BSAP7Ye_iK
z0rq!HVv;TObNBV!j8nM^M0u`60^h#Zpg*q+0B#ZMQo&~&KA54T!=<Y8VluN&B67SR
z0mSM31|^VBTo2um-66X*XN!qGT|7dclP+*n<sg$LPTCHitO4Gx3oH0dGc%_&2Xf=;
zCS1-kkv2*WGMMh8Vb7Uc8k9RxNOpJSlfymuYQXyGL1;}b>R~oDWyDrokPQI`Ji|<e
z;b5;NRHx@yN8L!o)~%%edT#WBS3m)HQjcA9et=*z@xnE9P4=i`u`FH~U_Fa52x08p
zr<JlZRP4^M$PCTYZ-yra0j-<Hbk?;k#w0pXlnA?;5=n-xFK0;v<8<eV-wct38_@4s
z3%oD}au_Ipl00D~zfjL(i7loWfvLiZ#=G(ZACfhBKM4bve;`#yBCqHw+)31B!@m(X
zX(<`ZwYa1Ip0M;3T~3w~+)=)eiREU1*53Cii3qMj`vitmN@euoiHoMGxmlVJN@%*k
z;)8{QN@rpIjt@NNMLl#&?}SYZ3UixP5E7h@nE`HIRq%jCi&o%p@z*9**sa~HfZLri
z;r6x38@Sm<5+dyQ1ZPCC)-E2w%v9am?sSGr>JtQHZr6|F9>2^pa?YdABEN$lpCi^I
z6NmUSp4G9$5i0pv(*T<yZvz9%grYRq(&5AA!IJ|wEe*<*rS`|;3VGLfb)}2+MVd^$
zBZ+EuDc#KW$l|MIxf0<CbF_kBlwvy_>x_488Qz^2kDMfR%L896TpUPbS23@gR!eTA
z0_<^*g4TaNh9bh9`6kQsJWo`KM|D%WIiDdHq_j$mz>8}AD&7OZ@X3<QB8~X#f7GP?
zefB>2yX@g~Soxbg`7hb~<jpMp_spmNZ?ian^5-Vd=tno4T>m`J>qlPU&Y$OH{m7&J
z^4F&ANBA#I(T}6vo<Gm~X%@8@{_>-LmxDhti=prS?UzSu`s?#}GmHN=r?==Efj0tg
z1l|a|5%}jL@E5cAyL!?$zxd`C|G|FoccbymGQL^Hf3Riz-DrGsjc=~;AM6^7f4!Fi
zP8J|2z(0?2{PS(nFMiCsezt$O-SXdTlb%2Ho8&No`2E!F|MzXu|2+l!lf%#U4|)9K
zx1WZc`Rxt(wX1)rS^Hc5|GC=={N(Vn{eR=}cTv(ayZ;3|em|Dqj^7Bp5qKl;KbgP;
z29GQoKxPV5rY{>LBT@h{cH^4+{+UbtH{NTxzV$t?z78RidiW&vGVpma3lC6~0V3)g
z6I1E!yPBIDL`WamO{1#=<W(i?)<PvT5*m>uZ7lpqQ8R!<iHQi%kHP@L9ztZxIKXm;
zkNuNf0weurdDto9_O!+v_^_k;IJCUo;tD~*v<<Q0Tcs7?)P(F`!?q98$0+g}P|Q0=
z-)#~ihYgaH5ZC)K^HxwuoPnvxPZhn;7Tvf>2sPNWJwxxA^FtSyJ}B|r%}a@Ux;9H>
z8(FBLKYBi8;!eFl>D(r_+5lBBm2jZc^FUvp3@&wcKN3%5h$hECt0F*z#bW#N7gIlG
zeLyHWtQRbM%+bh*a&sQ~lnNPS<H16WG&8^lI7%n>Ia&1L_3FmX|7v}UVpClAgJl${
zZY``F#<*sQb-w}e+FsF2Nr|0P^=_lHza8x?1zL8hGB+jkkxGT?aN^Yj8h{H(Rrs#<
zg8bnw?uU4%t6(hE^356Z@jP*Ekm6nNywS6j%)b1QiUbO8?b@pOFRw~TU0QR1@!877
z0K=T{+3r*cm={cWJpLE#y}Nc<6&IK3_LyYI!A{M|RFUPgn*<xxIx4jm;5%m#o``Y5
zPWAq&{j8`~2KwoSN$8R4xtg=F%rDQ_!}7PYpS_pBjhsi7E}o}+I$^H42CzU`7lSH2
zS^0!PjW%J*0oMxzVFh7Nj5694Wj#O+TL2CZM<p2Ljb|hs7)_w=R&cDYa~Ve@N6P@w
zS1bn+YNSHl>xyx6l}%c#Kk6pEZ;4J}x8YocNo{OX%k^saMBNEm4r7<&$rX(e-c@qv
zYJ<1Urt-jBvEX0Cpw-LejWrcs4u&!@hgYsDM5?Kj^B#(&c!3B=$&Rq_2zSc&_?+;-
z9BZwhnz(ki^5Ihc6VmKHSi$Opd?x&I6Be>V6YLo^$NYtF#EpkR<`{IoBlRn5^A3eR
zI_RU;sz}JtG*a7!w=)1vUJ%<3KZWX}&!P|<4?E=Q>y(V!B-!aERB12Hh`mFLqab~D
zw1p?Rc_<isQ=JMN{uKb4Fq4Ve;XD`DlXwi-^*rS!Z^RWs35kX7^{sWg-Y&dCMDo@B
zoI^gfV@d*Z`p4Z%(612ssVDbo%ekY5W|Ay^=+y;2VsZQ@V*4}4&ehf0a>B-6o%`Tb
zY2xjMzr5Vljj@%33hXC$+N7Q;1|RsDJ=-+MBbkM4;<La3ao0ty8vt^4-PIjbXW)vK
zelROQg$%&%wwg0<XF!S7t_RGGz{>VVQl+oera%sBuQ{yNUtKs<??;}6hLCFtc(n|N
zL+kT39kp0!WCr8pWR?fY$VHS5cR?S&gg7JJ=;C<zRz07{L1a#~gQq;wAToW<Td~^$
zh4(dJmQV=VH(V8-Z2g^gZ`7JQ+fwJekdGlDi%(Tuv>MOZCRPIUuCC!SL6=8S2Y;XC
zLV@G%t}KAmkPk^)oXaMf#*8qj7=C4mJUve9hl>gN%_<RUecJI{#Nj2UQ9tw1`#>@3
zkEW#&j;s%+GzgNsK7H+u*>!DRGaz^k_le4wF6rZB?`l>AX}!T9E5`Xlj<K_4=b@N;
z6=|KjR05#cW+?5iBrb)nyTyqE2$Bd6iRDspHZD#*(Rf9$96Z|v#Pg!^zNvLXb1a#d
z?q%NGlpJEX=YwRx#8J&QP=;gQ)q3y31-_4T<MP=<53PC$4C>ewJbA{rJqe1waPX60
z!$UB-Rds9O_ms$=*vB+hHCfx<9EuVNHzPY2r^N|9j_jgEwA1m^TXTJGf6o&8KA@em
zN?&hTr{I<;vR3OX(87v}8T)0oQSFeeWk+Sb_kh1o!L-L(>In;<YKnV_o^wI++{ce-
z$3LaJ6~FSEI`dU<XJYp+d$LYC4lxUDYZqXMd_`qO`sO+#utM;N!%(mu9tOD-DRa;?
zNnIX|oJ*2$ak6b_Qw(_g5KJ?dOKKjWOn(A}SH)<3%hUNNl+@yI2!8*H^1)Zi$7%G&
z9@&$thn3f8Lu*W_9&}G0k$X3wE=yL*>YV_PMwL&wrB{<a_N3e~L_u3!8+Y>ce9niS
zJ<N4HW%mP<_!dIm3?4q;u&^^Y=XExd6%UGNPgk2!Pj9$9t*2xIIvXr{zxV9twIXJ5
z^ITQ#t0u=mK<ZjHX+GyUtfY9)L2lBIH2#J?^iav9@>{yh%@dU?Hpq<Udp?KiEow2J
zfI~(E9n0h!47Z3uNkF;Y@4d3INH}Qh%H(dX$L&zLu5j~VI9RwUvomF}9u_eK1S=}y
zOuts28?n&87#_1-$A`$JzZ1$Slux<@jtO!=L$wT(7PLl9F6)7U>P_<kO@FTa!n=jo
z5y-G?QPUvHf3bC<?cG`#u51Atmmgd!#8u?wHUf7aC9oLaRGlt@P<LEgd)WbXFlA@&
z%?2{t4OK-B3CHCsOpE1*5=*Yok}_=zQ(lF@H$u)`(<kP~KzRx0?5kUYEn9%i7y-UX
z{%wi6j`^Lx10@jY=nF6BMbUP5yqESS+^k=o$Z6UuC=UN^!m?HjZel!NfY!w977V@P
zdwE`mC?eY-D2qB=Kn%_{l{WA5N5l2|4||oztFg4O&LyQPCi<I((TuHVY`V>`1|BPG
zcY7vh)`7AWO=p=ZO3|K`$Eod?^gP#2YD5S05VVTz*ZjKi+Fyy_l-|SkURT?6a^wg%
z7R?W+Rc@Vgw8t@(;YF_n67K6J8K<$kKxD$S)uKb_kZy_>h^jj2Q;mBhE;z8b&RI~@
zF1rCrFpj%@^v&1bwLJ|-ONY|-y;5w?ZR~2^9q*L(ii!gNsuKb+Y7e3(Tjr)F5pF=w
z4N^iV86?Ouuc$q{dU|BHW!%wTrySjy?_>#wqdq5!Q-5e7!@#p8OwpOROwp2O6Wx=j
z-^bY6=>j|EZG&L(f^J$?HHLis;V`g%b1<*n)~FFoP^{<VKwDM$CF&!TLPTJSt9RqJ
zZE#3Zy(U?dy47RI{JJz7#gZVS=-z12(}Ede8lb<jJ4=-wTTvq+4~V5Jm{o>4caLl~
z2mRpcVPqXi6<Cel$Gzm>N%sOZCN3A739Ur|j7z78mu|bQ?)7Vrge$1+u2ze+lxU^C
zKCX|=dg4CZ48tO?7EoJbg*lpWnea*x!9~&dJr`n1r`3|1n=2aT7lcyT=gn8U2iL7|
z$!?US*&aT_K>Kwj%3>9E>rc)X3}Y%re^ewA|5twGU+0pXD}P-i^79+^U%tx!YwFw4
z8-X_hZv@^5yb*XK@J~em!sPeM&(`?Rxes0@WXSC6(HD5m<DT!Ij7~30_|wu4-;LlP
zGp{nnH+@E1#<Yv7qswG(>lvk@M(swI;2wrW=)M0!?#`D?-G|y<$RUR2qzvt)ONJeS
z&Nk_O=@pAt*xZ&T_`1#YRsBnA|0Tz)#H5Zm--j>=C+kwZ=M9V!OyS8*fSNdu^3B)N
zdcHf%t(($W{HQq9G2Y2B{l~sWa+v|S_D3)8YquO=09As4kxE8;uT{`@njOyHGP)^>
zdPUlfU|C|NRdPI2KY#JW&*2H5H3Cb~zzPa=Z?4XpaHJsU5T{Mt(`$x`k7O<|CSE}-
z;UZ>7Y7?G<*q>fN(b@kZ*#W(idS4PTSDttc%~(jvovR#}{5riUeyBxL@p8sEe?BH?
zWQEQ-CU9JPZ7mjJtSD`iKu3XTl28z)&kdSNBe<-UO=IX_z$%PD=?aUh9ZZbC6&x20
zSQwlp&H%B6NEwzzaaqH16FDr0^q|Mv-U9vXfBVoLUr!)#T;wrG#Q;Zq+AO$-5^B{h
z=S-2*Fn5nwo_o4C5RJT8+Vaqf>GUBQjxqb4eB_#obUMjR%hs#gc!+~kwz)9wPA$!q
zk~pIa#<xSeB}8{q8^wbTyj^AvNU6AaKk8)Pu7wB?+k0k97#LDtW?Zae6Eg$au!$8t
zkBsxrB^~wVdczf!nx0%k7w{KIRqo3G?0I3Z*m!${+#s79RU6{6I^*HtQ`@R-!Y6xV
zhvaNqMT$i0E~FiN2}OAn;sOvlH+2O1#TKR74Y|^nYt04yhQj@z=dUA%H$+x0&)-#3
zT^*M7nbb{90f_KFiZt@zlsN3+ZU!sXskhr2WIx<;YVBoot3|{AT=vrn!ds?_5i)<J
zB=2a1aM|X@RjaBPH8aR3Rs<5XMMtQZ?#^*T+AQ|Ta;&OUmWhNwBQn^dCSRo;<;-z|
zGWu4qtFF^D0YBww(PVGtPNe8jSLsyLG8;WzH_PCQ%GQ&TY}tdL2Bq-(&KW<@4N!Ab
z+jg)JEFiKNa;G{;M_R4qgS?yU?1Le<?pOI|5{r~uY8WgxYquvrRWNiHu*3VM#x&%q
z%>?HAn-((Tubmo`O{nUnl&qjNLAO$Mownh)(#?g`<Rki{_`VGaE9PiTdTt2(q!ULs
zqS~LR1`IB~)A(|SaRJwK=|@iA9T4YNIvH9;IE}S<B9}&Fv~?<Ya^-WbpY|DdOyO<B
zBc;nt;Och^)^%AL-vZ`Iz!R&n6sd5dM|p97p=qHob3<Y*0?rrP``(>SWhi$-Vgf_(
zEl161EgwLB#^rs=rP0XjzWukm{_sy`qw%y8qsnq0n~1_IQ4T8wmFUTo2jVK#ZJNIE
z(&D_6pFscyDMauXW^q36js=Y5D#xKzC6|P%t)a3rcoMO^$`3j+rxAE0bQ*#ZB9>?|
z<4>18-Sk_Zil@J3?2x&OE<qBSo<w)LFD8tNAxrg`Q5fd1Rs(sdpJkA|30-z!nqR&J
zZwVEsGSg+&Xx~zgR^Q6CfAE2R59`;BL~Bz54#b05pW7Rwy3eeMuQI4UWg^9kfk~eo
zurg{L9gzZPLy(4{{iK^1?m7^DqZC+2?d7oaffU9<#VAA;T;DlSeb40vk&sRDevzSk
zk#o_Jx@eSOyOv`-F9t;vPzou9lWMfFMnwjR;>G;6pEOWueJ?l>xp)P-a&>hsRVsxK
z-Ktqb-oBtT`sX+--HgDz*FdIcQl9|dVnqZwQp7Q+&D@A}%*9g!>DPnjKBQRX5c`NR
zZp{gh!|;p5kPNUvK?Scdg*FfZ$4UCETns|*t3xZLZ^){nuFIdf`hjx6=K^a?rK?X$
zbgl(JEYDSq419sSr|NEj#nT)%&*NB~MV3m+(H5)BjoLbV%uWLpy{MmK2bvv{8sbJC
zE>_x7_X^5k)l$o*TUHILU<d&QmNnO%Nw9UY6^Uml4SFxrp_FCAm+6ezV5~%ojKsZT
zLC_279*!{80BTAA1V*25Aelg4Z3?0Wp{(Pb1-fh({4?9!T<BT`GG@-l$lcjEURC^9
z;5zOz%0}=@?ibN#btU3~p@%qKH}hz|Lh@y2QJMO3sNm8a1`tSjhcdfGdFK1B_Hhqw
zPYMod*12b`L&K8JS)1zW&n?(5^40=8d7DJY2Y60B-ZX%%nNh|WMn2I{O6cV>`yb=*
zgEF_Qs6~Pyv!n^;`NU(FTTj0ZFbl~%hBQ!Nt6;W7L36HsR_aj1*7)8n89+REX*k@K
zage}P6bNffggdZV6bF8|>vVg=l`hxu+6rq&0sBSCv=d#J-dG?C>T7nQ!@%bfCm`vi
zxgG+<8ZTIHnEm}Sv-ZT#mHoRPj9#B433ykT%kLIbDqWee{CYwV`zcH1FY0_pfd*5N
z4Q6jrXl95hg@hw=23*Xt*R1gtHg~-TpF0mox1DXUU&9mtC^m6GzBiU$ZS^Ha7)~8d
ze<8755thZP$!}pj!J_PCb;M!p=|h3Rd(`#gWxCAU2!O{5@u3^-GNq1J!!2tXkF>nF
z@=Ew5EO61wRdN~(4Od&0%^;}BgsFb;k&A1PQS6u7ly9GZpVFMzZ2Nfs{9<_LD&fvl
z>DB{b^v(9YFHq$X+E}IRT<b!(_QCFieJQ&32HhyS{`c?fU};DS`pHnJxyjeXvW_O3
zY7+Hl{MpY(^B6t34NjxTF^iP*i@1Hec11*Su_ge9c^I%l54jJ6sO9*B^*WsF8uck5
zYbJ{9fV?+Ul)XJ@&wRTb*Y1%n6qsUl_>(X;n$)PZWITlY;eiR=NT!u0F0XFSk$LkF
zS^k*HO{+Ro9M&7~0(8cA!Jr==eb=q}sdi$GIQ3}9Eh325nP-YrOO)ObOsNmW;e_lP
z)fYuK^=Y*jy5UbV7_3>|Cz#k*ey@ruRvpw?-SF>_c}SzrTe|L8K}5QndYw{Lb9Ysx
zG0#8huPA>_L<->1tnzm<{p^mzRV~wXkCNEdI}90_fE)*~$*IP(-sN<BI98=Skr)SE
zQ*ArNZfM#I<}~@Gc3X$#Gc}|ySZs<s3Cnvl@m{Ac8uoDl3Bxb%Wf#ETWA<G=n;x(Y
zc8rM;4b`$9qnSLNEVw_TgG7I5yvHp;l(mkcITu)*lQw&H$ZFP)wLvmO?<37Us5jtr
zw7#VnD4ORkv@yKO)9fUaPJKZ>3B0s1h5CBt+1HhwjcYjmI707)K!7v$EzuR^$TW!k
z-<LRF{y^e@oW4n%f2#TT+sV8Ucq8ye;Eli=f&Vapzet?lRkOdToHv#84^}z98;x&5
o=S}GRgN4rTM&p~-dDA-oV6Aib>q;N34FEt`fH%AIzs2tSA3GqYi~s-t

literal 0
HcmV?d00001

diff --git a/tests/make_regression_tdata.py b/tests/make_regression_tdata.py
new file mode 100644
index 00000000..03deb422
--- /dev/null
+++ b/tests/make_regression_tdata.py
@@ -0,0 +1,69 @@
+"""
+Script to create data used for regression testing.
+
+"""
+
+import numpy as np
+from numpy import random
+import h5py
+
+import bitshuffle
+from bitshuffle import h5
+from h5py import h5z
+
+BLOCK_SIZE = 64  # Smallish such that datasets have many blocks but are small.
+COMP_LVL = 10  # ZSTD compression level
+FILTER_PIPELINE = [h5.H5FILTER]
+FILTER_OPTS = [
+    [(BLOCK_SIZE, h5.H5_COMPRESS_LZ4)],
+    [(BLOCK_SIZE, h5.H5_COMPRESS_ZSTD, COMP_LVL)],
+]
+
+OUT_FILE = "tests/data/regression_%s.h5" % bitshuffle.__version__
+
+DTYPES = ["a1", "a2", "a3", "a4", "a6", "a8", "a10"]
+
+f = h5py.File(OUT_FILE, "w")
+g_orig = f.create_group("origional")
+g_comp_lz4 = f.create_group("compressed")
+g_comp_zstd = f.create_group("compressed_zstd")
+
+for dtype in DTYPES:
+    for rep in ["a", "b", "c"]:
+        dset_name = "%s_%s" % (dtype, rep)
+        dtype = np.dtype(dtype)
+        n_elem = 3 * BLOCK_SIZE + random.randint(0, BLOCK_SIZE)
+        shape = (n_elem,)
+        chunks = shape
+        data = random.randint(0, 255, n_elem * dtype.itemsize)
+        data = data.astype(np.uint8).view(dtype)
+
+        g_orig.create_dataset(dset_name, data=data)
+
+        # Create LZ4 compressed data
+        h5.create_dataset(
+            g_comp_lz4,
+            bytes(dset_name, "utf-8"),
+            shape,
+            dtype,
+            chunks=chunks,
+            filter_pipeline=FILTER_PIPELINE,
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=FILTER_OPTS[0],
+        )
+        g_comp_lz4[dset_name][:] = data
+
+        # Create ZSTD compressed data
+        h5.create_dataset(
+            g_comp_zstd,
+            bytes(dset_name, "utf-8"),
+            shape,
+            dtype,
+            chunks=chunks,
+            filter_pipeline=FILTER_PIPELINE,
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=FILTER_OPTS[1],
+        )
+        g_comp_zstd[dset_name][:] = data
+
+f.close()
diff --git a/tests/test_ext.py b/tests/test_ext.py
new file mode 100644
index 00000000..b2577c0d
--- /dev/null
+++ b/tests/test_ext.py
@@ -0,0 +1,627 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+import time
+
+import numpy as np
+from numpy import random
+
+from bitshuffle import ext, __zstd__
+
+
+# If we are doing timeings by what factor to increase workload.
+# Remember to change `ext.REPEATC`.
+TIME = 0
+# TIME = 8    # 8kB blocks same as final blocking.
+BLOCK = 1024
+
+
+TEST_DTYPES = [
+    np.uint8,
+    np.uint16,
+    np.int32,
+    np.uint64,
+    np.float32,
+    np.float64,
+    np.complex128,
+]
+TEST_DTYPES += [b"a3", b"a5", b"a6", b"a7", b"a9", b"a11", b"a12", b"a24", b"a48"]
+
+
+class TestProfile(unittest.TestCase):
+    def setUp(self):
+        n = 1024  # bytes.
+        if TIME:
+            n *= TIME
+        # Almost random bits, but now quite. All bits exercised (to fully test
+        # transpose) but still slightly compresible.
+        self.data = random.randint(0, 200, n).astype(np.uint8)
+        self.fun = ext.copy
+        self.check = None
+        self.check_data = None
+        self.case = "None"
+
+    def tearDown(self):
+        """Performs all tests and timings."""
+        if TIME:
+            reps = 10
+        else:
+            reps = 1
+        delta_ts = []
+        try:
+            for ii in range(reps):
+                t0 = time.time()
+                out = self.fun(self.data)
+                delta_ts.append(time.time() - t0)
+        except RuntimeError as err:
+            if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2():
+                return
+            else:
+                raise
+        delta_t = min(delta_ts)
+        size_i = self.data.size * self.data.dtype.itemsize
+        size_o = out.size * out.dtype.itemsize
+        size = max([size_i, size_o])
+        speed = ext.REPEAT * size / delta_t / 1024**3  # GB/s
+        if TIME:
+            print("%-20s: %5.2f s/GB,   %5.2f GB/s" % (self.case, 1.0 / speed, speed))
+        if self.check is not None:
+            ans = self.check(self.data).view(np.uint8)
+            self.assertTrue(np.all(ans == out.view(np.uint8)))
+        if self.check_data is not None:
+            ans = self.check_data.view(np.uint8)
+            self.assertTrue(np.all(ans == out.view(np.uint8)))
+
+    def test_00_copy(self):
+        self.case = "copy"
+        self.fun = ext.copy
+        self.check = lambda x: x
+
+    def test_01a_trans_byte_elem_scal_16(self):
+        self.case = "byte T elem scal 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01b_trans_byte_elem_scal_32(self):
+        self.case = "byte T elem scal 32"
+        self.data = self.data.view(np.int32)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01c_trans_byte_elem_scal_64(self):
+        self.case = "byte T elem scal 64"
+        self.data = self.data.view(np.int64)
+        self.fun = ext.trans_byte_elem_scal
+        self.check = trans_byte_elem
+
+    def test_01d_trans_byte_elem_16(self):
+        self.case = "byte T elem SSE 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01e_trans_byte_elem_32(self):
+        self.case = "byte T elem SSE 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01f_trans_byte_elem_64(self):
+        self.case = "byte T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01g_trans_byte_elem_128(self):
+        self.case = "byte T elem SSE 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01h_trans_byte_elem_96(self):
+        self.case = "byte T elem SSE 96"
+        n = self.data.size // 128 * 96
+        dt = np.dtype(
+            [(str("a"), np.int32), (str("b"), np.int32), (str("c"), np.int32)]
+        )
+        self.data = self.data[:n].view(dt)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_01i_trans_byte_elem_80(self):
+        self.case = "byte T elem SSE 80"
+        n = self.data.size // 128 * 80
+        dt = np.dtype(
+            [
+                (str("a"), np.int16),
+                (str("b"), np.int16),
+                (str("c"), np.int16),
+                (str("d"), np.int16),
+                (str("e"), np.int16),
+            ]
+        )
+        self.data = self.data[:n].view(dt)
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def test_03a_trans_bit_byte(self):
+        self.case = "bit T byte scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_scal
+        self.check = trans_bit_byte
+
+    def test_03d_trans_bit_byte_SSE(self):
+        self.case = "bit T byte SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_SSE
+        self.check = trans_bit_byte
+
+    def test_03f_trans_bit_byte_AVX(self):
+        self.case = "bit T byte AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_byte_AVX
+        self.check = trans_bit_byte
+
+    def test_03g_trans_bit_byte_AVX_32(self):
+        self.case = "bit T byte AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_byte_AVX
+        self.check = trans_bit_byte
+
+    def test_04a_trans_bit_elem_AVX(self):
+        self.case = "bit T elem AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04b_trans_bit_elem_AVX_128(self):
+        self.case = "bit T elem AVX 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04c_trans_bit_elem_AVX_32(self):
+        self.case = "bit T elem AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04d_trans_bit_elem_AVX_16(self):
+        self.case = "bit T elem AVX 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_04e_trans_bit_elem_64(self):
+        self.case = "bit T elem scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_04f_trans_bit_elem_SSE_32(self):
+        self.case = "bit T elem SSE 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_04g_trans_bit_elem_SSE_64(self):
+        self.case = "bit T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_06a_untrans_bit_elem_16(self):
+        self.case = "bit U elem SSE 16"
+        pre_trans = self.data.view(np.int16)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06b_untrans_bit_elem_128(self):
+        self.case = "bit U elem SSE 128"
+        pre_trans = self.data.view(np.complex128)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06c_untrans_bit_elem_32(self):
+        self.case = "bit U elem SSE 32"
+        pre_trans = self.data.view(np.float32)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06d_untrans_bit_elem_32(self):
+        self.case = "bit U elem AVX 32"
+        pre_trans = self.data.view(np.float32)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_06e_untrans_bit_elem_64(self):
+        self.case = "bit U elem SSE 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_06f_untrans_bit_elem_64(self):
+        self.case = "bit U elem AVX 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_06g_untrans_bit_elem_64(self):
+        self.case = "bit U elem scal 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_scal
+        self.check_data = pre_trans
+
+    def test_07a_trans_byte_bitrow_64(self):
+        self.case = "byte T row scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_scal
+
+    def test_07b_trans_byte_bitrow_SSE_64(self):
+        self.case = "byte T row SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_SSE
+        self.check = ext.trans_byte_bitrow_scal
+
+    def test_07c_trans_byte_bitrow_AVX_64(self):
+        self.case = "byte T row AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_byte_bitrow_AVX
+        self.check = ext.trans_byte_bitrow_scal
+
+    def test_08a_shuffle_bit_eight_scal_64(self):
+        self.case = "bit S eight scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_scal
+
+    def test_08b_shuffle_bit_eight_SSE_64(self):
+        self.case = "bit S eight SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_SSE
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08c_shuffle_bit_eight_AVX_32(self):
+        self.case = "bit S eight AVX 32"
+        self.data = self.data.view(np.float32)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08d_shuffle_bit_eight_AVX_64(self):
+        self.case = "bit S eight AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08e_shuffle_bit_eight_AVX_16(self):
+        self.case = "bit S eight AVX 16"
+        self.data = self.data.view(np.int16)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_08f_shuffle_bit_eight_AVX_128(self):
+        self.case = "bit S eight AVX 128"
+        self.data = self.data.view(np.complex128)
+        self.fun = ext.shuffle_bit_eightelem_AVX
+        self.check = ext.shuffle_bit_eightelem_scal
+
+    def test_09a_trans_bit_elem_scal_64(self):
+        self.case = "bit T elem scal 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_09b_trans_bit_elem_SSE_64(self):
+        self.case = "bit T elem SSE 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_09c_trans_bit_elem_AVX_64(self):
+        self.case = "bit T elem AVX 64"
+        self.data = self.data.view(np.float64)
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_09d_untrans_bit_elem_scal_64(self):
+        self.case = "bit U elem scal 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_scal
+        self.check_data = pre_trans
+
+    def test_09e_untrans_bit_elem_SSE_64(self):
+        self.case = "bit U elem SSE 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_SSE
+        self.check_data = pre_trans
+
+    def test_09f_untrans_bit_elem_AVX_64(self):
+        self.case = "bit U elem AVX 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = trans_bit_elem(pre_trans)
+        self.fun = ext.untrans_bit_elem_AVX
+        self.check_data = pre_trans
+
+    def test_10a_bitshuffle_64(self):
+        self.case = "bitshuffle 64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.bitshuffle(x, BLOCK)
+
+    def test_10b_bitunshuffle_64(self):
+        self.case = "bitunshuffle 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.bitshuffle(pre_trans, BLOCK)
+        self.fun = lambda x: ext.bitunshuffle(x, BLOCK)
+        self.check_data = pre_trans
+
+    def test_10c_compress_64(self):
+        self.case = "compress 64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.compress_lz4(x, BLOCK)
+
+    def test_10d_decompress_64(self):
+        self.case = "decompress 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.compress_lz4(pre_trans, BLOCK)
+        self.fun = lambda x: ext.decompress_lz4(
+            x, pre_trans.shape, pre_trans.dtype, BLOCK
+        )
+        self.check_data = pre_trans
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_10c_compress_z64(self):
+        self.case = "compress zstd  64"
+        self.data = self.data.view(np.float64)
+        self.fun = lambda x: ext.compress_zstd(x, BLOCK)
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_10d_decompress_z64(self):
+        self.case = "decompress zstd 64"
+        pre_trans = self.data.view(np.float64)
+        self.data = ext.compress_zstd(pre_trans, BLOCK)
+        self.fun = lambda x: ext.decompress_zstd(
+            x, pre_trans.shape, pre_trans.dtype, BLOCK
+        )
+        self.check_data = pre_trans
+
+
+"""
+Commented out to prevent nose from finding them.
+class TestDevCases(unittest.TestCase):
+
+    def deactivated_test_trans_byte_bitrow_AVX(self):
+        d = np.arange(256, dtype=np.uint32)
+        #d = ext.trans_bit_elem(d)
+        t = ext.trans_byte_bitrow_AVX(d).view(np.uint8)
+        t1 = ext.trans_byte_bitrow_SSE(d).view(np.uint8)
+        t.shape = (32, 32)
+        t1.shape = (32, 32)
+        #print t[:20,:18]
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_untrans_bit_elem(self):
+        d = np.arange(32, dtype=np.uint16)
+        #d = random.randint(0, 2**7, 256).astype(np.uint16)
+        d1 = ext.trans_bit_elem(d)
+        #print d
+        t = ext.untrans_bit_elem_AVX(d1)
+        #t1 = ext.untrans_bit_byte_scal(d1)
+        #print np.reshape(d1.view(np.uint8), (16, 4))
+        #print np.reshape(t1.view(np.uint8), (2, 32))
+        #print np.reshape(t2.view(np.uint8), (32, 2))
+        #print np.reshape(t.view(np.uint8), (32, 2))
+
+    def deactivated_test_trans_bit_byte(self):
+        d = np.arange(16, dtype=np.uint16)
+        t = ext.trans_bit_byte_scal(d)
+        #print t
+        t1 = trans_bit_byte(d)
+        #print t1
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_trans_byte_bitrow_SSE(self):
+        d = np.arange(256, dtype = np.uint8)
+        t = ext.trans_byte_bitrow_scal(d)
+        #print np.reshape(t, (32, 8))
+        t1 = ext.trans_byte_bitrow_SSE(d)
+        #print np.reshape(t1, (32, 8))
+        self.assertTrue(np.all(t == t1))
+
+    def deactivated_test_trans_byte_elem_SSE(self):
+        d = np.empty(16, dtype=([('a', 'u4'), ('b', 'u4'), ('c', 'u4')]))
+        d['a'] = np.arange(16) * 1
+        d['b'] = np.arange(16) * 2
+        d['c'] = np.arange(16) * 3
+        #print d.dtype.itemsize
+        #print np.reshape(d.view(np.uint8), (16, 12))
+        t1 = ext.trans_byte_elem_SSE(d)
+        #print np.reshape(t1.view(np.uint8), (12, 16))
+        t0 = trans_byte_elem(d)
+        #print np.reshape(t0.view(np.uint8), (12, 16))
+        self.assertTrue(np.all(t0.view(np.uint8) == t1.view(np.uint8)))
+
+    def deactivated_test_bitshuffle(self):
+        d = np.arange(128, dtype=np.uint16)
+        t1 = ext.bitshuffle(d)
+        #print t1
+        t2 = ext.bitunshuffle(t1)
+        #print t2
+        self.assertTrue(np.all(t2.view(np.uint8) == d.view(np.uint8)))
+"""
+
+
+class TestOddLengths(unittest.TestCase):
+    def setUp(self):
+        self.reps = 10
+        self.nmax = 128 * 8
+        # self.nmax = 4 * 8    # XXX
+        self.fun = ext.copy
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_SSE(self):
+        self.fun = ext.trans_bit_elem_SSE
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_SSE(self):
+        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_AVX(self):
+        self.fun = ext.trans_bit_elem_AVX
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_AVX(self):
+        self.fun = lambda x: ext.untrans_bit_elem_SSE(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_bit_elem_scal(self):
+        self.fun = ext.trans_bit_elem_scal
+        self.check = trans_bit_elem
+
+    def test_untrans_bit_elem_scal(self):
+        self.fun = lambda x: ext.untrans_bit_elem_scal(ext.trans_bit_elem(x))
+        self.check = lambda x: x
+
+    def test_trans_byte_elem_SSE(self):
+        self.fun = ext.trans_byte_elem_SSE
+        self.check = trans_byte_elem
+
+    def tearDown(self):
+        try:
+            for dtype in TEST_DTYPES:
+                itemsize = np.dtype(dtype).itemsize
+                nbyte_max = self.nmax * itemsize
+                dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+                dbuf = dbuf.view(dtype)
+                for ii in range(self.reps):
+                    n = random.randint(0, self.nmax // 8, 1)[0] * 8
+                    data = dbuf[:n]
+                    out = self.fun(data).view(np.uint8)
+                    ans = self.check(data).view(np.uint8)
+                    self.assertTrue(np.all(out == ans))
+        except RuntimeError as err:
+            if len(err.args) > 1 and (err.args[1] == -11) and not ext.using_SSE2():
+                return
+            if len(err.args) > 1 and (err.args[1] == -12) and not ext.using_AVX2():
+                return
+            else:
+                raise
+
+
+class TestBitShuffleCircle(unittest.TestCase):
+    """Ensure that final filter is circularly consistant for any data type and
+    any length buffer."""
+
+    def test_circle(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.bitshuffle(data)
+                out = ext.bitunshuffle(shuff)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+    def test_circle_with_compression(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.compress_lz4(data)
+                out = ext.decompress_lz4(shuff, data.shape, data.dtype)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+    @unittest.skipUnless(__zstd__, "ZSTD support not included")
+    def test_circle_with_zstd_compression(self):
+        nmax = 100000
+        reps = 20
+        for dtype in TEST_DTYPES:
+            itemsize = np.dtype(dtype).itemsize
+            nbyte_max = nmax * itemsize
+            dbuf = random.randint(0, 255, nbyte_max).astype(np.uint8)
+            dbuf = dbuf.view(dtype)
+            for ii in range(reps):
+                n = random.randint(0, nmax, 1)[0]
+                data = dbuf[:n]
+                shuff = ext.compress_zstd(data)
+                out = ext.decompress_zstd(shuff, data.shape, data.dtype)
+                self.assertTrue(out.dtype is data.dtype)
+                self.assertTrue(np.all(data.view(np.uint8) == out.view(np.uint8)))
+
+
+# Python implementations for checking results.
+
+
+def trans_byte_elem(arr):
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    in_buf = arr.flat[:].view(np.uint8)
+    nelem = in_buf.size // itemsize
+    in_buf.shape = (nelem, itemsize)
+
+    out_buf = np.empty((itemsize, nelem), dtype=np.uint8)
+    for ii in range(nelem):
+        for jj in range(itemsize):
+            out_buf[jj, ii] = in_buf[ii, jj]
+    return out_buf.flat[:].view(dtype)
+
+
+def trans_bit_byte(arr):
+    n = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    bits = np.unpackbits(arr.view(np.uint8))
+    bits.shape = (n * itemsize, 8)
+    # We have to reverse the order of the bits both for unpacking and packing,
+    # since we want to call the least significant bit the first bit.
+    bits = bits[:, ::-1]
+    bits_shuff = (bits.T).copy()
+    bits_shuff.shape = (n * itemsize, 8)
+    bits_shuff = bits_shuff[:, ::-1]
+    arr_bt = np.packbits(bits_shuff.flat[:])
+    return arr_bt.view(dtype)
+
+
+def trans_bit_elem(arr):
+    n = arr.size
+    dtype = arr.dtype
+    itemsize = dtype.itemsize
+    bits = np.unpackbits(arr.view(np.uint8))
+    bits.shape = (n * itemsize, 8)
+    # We have to reverse the order of the bits both for unpacking and packing,
+    # since we want to call the least significant bit the first bit.
+    bits = bits[:, ::-1].copy()
+    bits.shape = (n, itemsize * 8)
+    bits_shuff = (bits.T).copy()
+    bits_shuff.shape = (n * itemsize, 8)
+    bits_shuff = bits_shuff[:, ::-1]
+    arr_bt = np.packbits(bits_shuff.flat[:])
+    return arr_bt.view(dtype)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_h5filter.py b/tests/test_h5filter.py
new file mode 100644
index 00000000..2dbb2c3f
--- /dev/null
+++ b/tests/test_h5filter.py
@@ -0,0 +1,138 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+import os
+import glob
+
+import numpy as np
+import h5py
+import pytest
+from h5py import h5z
+
+from bitshuffle import h5, __zstd__
+
+
+os.environ["HDF5_PLUGIN_PATH"] = ""
+
+
+class TestFilter(unittest.TestCase):
+    def test_filter(self):
+        shape = (32 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008, 32000),
+            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
+            filter_opts=None,
+        )
+        f["range"][:] = data
+
+        f.close()
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def test_with_block_size(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008, 32000),
+            filter_flags=(h5z.FLAG_MANDATORY, h5z.FLAG_MANDATORY),
+            filter_opts=((680,), ()),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def test_with_lz4_compression(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008,),
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=((0, h5.H5_COMPRESS_LZ4),),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    @pytest.mark.skipif(
+        __zstd__ is False,
+        reason="Bitshuffle has not been built with ZSTD support.",
+    )
+    def test_with_zstd_compression(self):
+        shape = (128 * 1024 + 783,)
+        chunks = (4 * 1024 + 23,)
+        compression_lvl = 10
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        h5.create_dataset(
+            f,
+            b"range",
+            shape,
+            dtype,
+            chunks,
+            filter_pipeline=(32008,),
+            filter_flags=(h5z.FLAG_MANDATORY,),
+            filter_opts=((0, h5.H5_COMPRESS_ZSTD, compression_lvl),),
+        )
+        f["range"][:] = data
+
+        f.close()
+        # os.system('h5dump -H -p tmp_test_filters.h5')
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def tearDown(self):
+        files = glob.glob("tmp_test_*")
+        for f in files:
+            os.remove(f)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_h5plugin.py b/tests/test_h5plugin.py
new file mode 100644
index 00000000..001fa9da
--- /dev/null
+++ b/tests/test_h5plugin.py
@@ -0,0 +1,66 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+import unittest
+import os
+import glob
+
+import numpy as np
+import h5py
+import pytest
+from subprocess import Popen, PIPE, STDOUT
+
+import bitshuffle
+
+
+plugin_dir = os.path.join(os.path.dirname(bitshuffle.__file__), "plugin")
+os.environ["HDF5_PLUGIN_PATH"] = plugin_dir
+
+
+H5VERSION = h5py.h5.get_libversion()
+if H5VERSION[0] < 1 or (
+    H5VERSION[0] == 1
+    and (H5VERSION[1] < 8 or (H5VERSION[1] == 8 and H5VERSION[2] < 11))
+):
+    H51811P = False
+else:
+    H51811P = True
+
+
+class TestFilterPlugins(unittest.TestCase):
+    @pytest.mark.skipif(
+        "CIBUILDWHEEL" in os.environ,
+        reason="Can't build dynamic HDF5 plugin into bitshuffle wheel.",
+    )
+    def test_plugins(self):
+        if not H51811P:
+            return
+        shape = (32 * 1024,)
+        chunks = (4 * 1024,)
+        dtype = np.int64
+        data = np.arange(shape[0])
+        fname = "tmp_test_filters.h5"
+        f = h5py.File(fname, "w")
+        dset = f.create_dataset(
+            "range", shape=shape, dtype=dtype, chunks=chunks, compression=32008
+        )
+        dset[:] = data
+        f.close()
+
+        # Make sure the filters are working outside of h5py by calling h5dump
+        h5dump = Popen(["h5dump", fname], stdout=PIPE, stderr=STDOUT)
+        stdout, nothing = h5dump.communicate()
+        err = h5dump.returncode
+        self.assertEqual(err, 0)
+
+        f = h5py.File(fname, "r")
+        d = f["range"][:]
+        self.assertTrue(np.all(d == data))
+        f.close()
+
+    def tearDown(self):
+        files = glob.glob("tmp_test_*")
+        for f in files:
+            os.remove(f)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_regression.py b/tests/test_regression.py
new file mode 100644
index 00000000..bb9febc4
--- /dev/null
+++ b/tests/test_regression.py
@@ -0,0 +1,46 @@
+"""
+Test that data encoded with earlier versions can still be decoded correctly.
+
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import pathlib
+import unittest
+
+import numpy as np
+import h5py
+from bitshuffle import __zstd__
+
+from packaging import version
+
+TEST_DATA_DIR = pathlib.Path(__file__).parent / "data"
+
+OUT_FILE_TEMPLATE = "regression_%s.h5"
+
+VERSIONS = ["0.1.3", "0.4.0"]
+
+
+class TestAll(unittest.TestCase):
+    def test_regression(self):
+        for rev in VERSIONS:
+            file_name = TEST_DATA_DIR / (OUT_FILE_TEMPLATE % rev)
+            f = h5py.File(file_name, "r")
+            g_orig = f["original"]
+            g_comp = f["compressed"]
+
+            for dset_name in g_comp.keys():
+                self.assertTrue(np.all(g_comp[dset_name][:] == g_orig[dset_name][:]))
+
+            # Only run ZSTD comparison on versions >= 0.4.0 and if ZSTD support
+            # has been built into bitshuffle
+            if version.parse(rev) >= version.parse("0.4.0") and __zstd__:
+                g_comp_zstd = f["compressed_zstd"]
+                for dset_name in g_comp_zstd.keys():
+                    self.assertTrue(
+                        np.all(g_comp_zstd[dset_name][:] == g_orig[dset_name][:])
+                    )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/zstd b/zstd
new file mode 160000
index 00000000..18d02cbf
--- /dev/null
+++ b/zstd
@@ -0,0 +1 @@
+Subproject commit 18d02cbf2e0654de08093094f1a77cfd231f11d7

From 7a4843f7d4fc097fbd4082fbe92ff7c152dcb020 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Tue, 8 Nov 2022 11:45:44 +0100
Subject: [PATCH 6/7] Patch bitshuffle to build on Windows

Ref: https://github.com/kiyo-masui/bitshuffle/pull/122
---
 src/bitshuffle/src/bitshuffle.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/bitshuffle/src/bitshuffle.c b/src/bitshuffle/src/bitshuffle.c
index a8ef0b5c..ba5cde3a 100644
--- a/src/bitshuffle/src/bitshuffle.c
+++ b/src/bitshuffle/src/bitshuffle.c
@@ -182,7 +182,7 @@ int64_t bshuf_decompress_zstd_block(ioc_chain *C_ptr,
     tmp_buf = malloc(size * elem_size);
     if (tmp_buf == NULL) return -1;
 
-    nbytes = ZSTD_decompress(tmp_buf, size * elem_size, in + 4, nbytes_from_header);
+    nbytes = ZSTD_decompress(tmp_buf, size * elem_size, (void *)((char *) in + 4), nbytes_from_header);
     CHECK_ERR_FREE_LZ(nbytes, tmp_buf);
     if (nbytes != size * elem_size) {
         free(tmp_buf);

From f84f88e7e56d394605a51769d6971ff33c6c3f06 Mon Sep 17 00:00:00 2001
From: Thomas VINCENT <thomas.vincent@esrf.fr>
Date: Tue, 8 Nov 2022 11:50:22 +0100
Subject: [PATCH 7/7] Add reference of patch

---
 doc/information.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/information.rst b/doc/information.rst
index 20c13248..8dcd19b1 100644
--- a/doc/information.rst
+++ b/doc/information.rst
@@ -50,7 +50,7 @@ HDF5 filters and compression libraries
 HDF5 compression filters and compression libraries sources were obtained from:
 
 * LZ4 plugin (commit d48f960) and lz4 (v1.9.3): https://github.com/nexusformat/HDF5-External-Filter-Plugins and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/lz4-1.9.3
-* bitshuffle plugin (0.4.2) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0
+* bitshuffle plugin (0.4.2 + patch `PR #122 <https://github.com/kiyo-masui/bitshuffle/pull/122>`_) and zstd (v1.5.0): https://github.com/kiyo-masui/bitshuffle and https://github.com/Blosc/c-blosc/tree/v1.21.1/internal-complibs/zstd-1.5.0
 * bzip2 plugin (from PyTables v3.7.0) and bzip2 (v1.0.8): https://github.com/PyTables/PyTables/, https://sourceware.org/git/bzip2.git
 * hdf5-blosc plugin (v1.0.0), c-blosc (v1.21.1) and snappy (v1.1.9): https://github.com/Blosc/hdf5-blosc, https://github.com/Blosc/c-blosc and https://github.com/google/snappy
 * FCIDECOMP plugin (v1.0.2) and CharLS (branch 1.x-master SHA1 ID: 25160a42fb62e71e4b0ce081f5cb3f8bb73938b5):