refactor: move to z85base91 package

JarbasHiveMind · Jan 8, 2025 · 2ad8118 · 2ad8118
1 parent 89f4b2c
commit 2ad8118
Show file tree

Hide file tree

Showing 15 changed files with 73 additions and 594 deletions.
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
diff --git a/hivemind_bus_client/encodings/__init__.py b/hivemind_bus_client/encodings/__init__.py
@@ -1,3 +1,10 @@
-from hivemind_bus_client.encodings.z85b import Z85B
-from hivemind_bus_client.encodings.z85p import Z85P
-from hivemind_bus_client.encodings.b91 import B91
+from z85base91 import Z85B, Z85P, B91
+import warnings
+
+# Deprecation warning
+warnings.warn(
+    "Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
+    "Please update your code to use the new package 'z85base91'",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/hivemind_bus_client/encodings/b91.py b/hivemind_bus_client/encodings/b91.py
@@ -1,100 +1,10 @@
-from typing import Union
-
-
-class B91:
-    ALPHABET = [
-        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
-        'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
-        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
-        'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
-        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$',
-        '%', '&', '(', ')', '*', '+', ',', '.', '/', ':', ';', '<', '=',
-        '>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~', '"'
-    ]
-
-    DECODE_TABLE = {char: idx for idx, char in enumerate(ALPHABET)}
-
-    @classmethod
-    def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
-        """
-        Decodes a Base91-encoded string into its original binary form.
-
-        Args:
-            encoded_data (Union[str, bytes]): Base91-encoded input data. If `bytes`, it is decoded as UTF-8.
-            encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
-
-        Returns:
-            bytes: The decoded binary data.
-
-        Raises:
-            ValueError: If the input contains invalid Base91 characters.
-        """
-        if isinstance(encoded_data, bytes):
-            encoded_data = encoded_data.decode(encoding)
-
-        v = -1
-        b = 0
-        n = 0
-        out = bytearray()
-
-        for char in encoded_data:
-            if char not in cls.DECODE_TABLE:
-                raise ValueError(f"Invalid Base91 character: {char}")
-            c = cls.DECODE_TABLE[char]
-            if v < 0:
-                v = c
-            else:
-                v += c * 91
-                b |= v << n
-                n += 13 if (v & 8191) > 88 else 14
-                while n >= 8:
-                    out.append(b & 255)
-                    b >>= 8
-                    n -= 8
-                v = -1
-
-        if v >= 0:
-            out.append((b | v << n) & 255)
-
-        return bytes(out)
-
-    @classmethod
-    def encode(cls, data: Union[bytes, str], encoding: str = "utf-8") -> bytes:
-        """
-        Encodes binary data into a Base91-encoded string.
-
-        Args:
-            data (Union[bytes, str]): Input binary data to encode. If `str`, it is encoded as UTF-8.
-            encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
-
-        Returns:
-            str: The Base91-encoded string.
-        """
-        if isinstance(data, str):
-            data = data.encode(encoding)
-
-        b = 0
-        n = 0
-        out = []
-
-        for byte in data:
-            b |= byte << n
-            n += 8
-            if n > 13:
-                v = b & 8191
-                if v > 88:
-                    b >>= 13
-                    n -= 13
-                else:
-                    v = b & 16383
-                    b >>= 14
-                    n -= 14
-                out.append(cls.ALPHABET[v % 91])
-                out.append(cls.ALPHABET[v // 91])
-
-        if n:
-            out.append(cls.ALPHABET[b % 91])
-            if n > 7 or b > 90:
-                out.append(cls.ALPHABET[b // 91])
-
-        return ''.join(out).encode(encoding)
+from z85base91 import B91
+import warnings
+
+# Deprecation warning
+warnings.warn(
+    "Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
+    "Please update your code to use the new package 'z85base91'",
+    DeprecationWarning,
+    stacklevel=2,
+)
diff --git a/hivemind_bus_client/encodings/benchmark.py b/hivemind_bus_client/encodings/benchmark.py
@@ -179,7 +179,7 @@ def save_detailed_results_to_markdown(results: dict, filename: str):
 @click.command()
 @click.option("--sizes", default="10,100,1000,5000,10000,50000", help="Data sizes to benchmark, comma-separated.")
 @click.option("--weights", default="0.5,0.5", help="Weights for performance and bandwidth, comma-separated.")
-@click.option("--iterations", default=20, help="Number of iterations to average results.")
+@click.option("--iterations", default=1000, help="Number of iterations to average results.")
 def main(sizes: str, weights: str, iterations: int):
     global performance_weight, bandwidth_weight
 
@@ -258,3 +258,19 @@ def main(sizes: str, weights: str, iterations: int):
 
 if __name__ == "__main__":
     main()
+
+#
+
+
+
+# Benchmark Results (new):
+# Encoding             Avg Encoding Time    Avg Decoding Time    Avg Size Increase    Performance  Bandwidth  Aggregate
+# ==============================================================================================================
+# JSON-B64             0.000001             0.000004             1.38                 100.00       81.64      90.82
+# JSON-URLSAFE-B64     0.000002             0.000005             1.38                 73.23        81.64      77.43
+# JSON-B64-stdlib      0.000009             0.000009             1.38                 27.29        81.64      54.46
+# JSON-B91             0.001880             0.002634             1.24                 1.00         100.00     50.50
+# JSON-Z85B            0.001361             0.001661             1.26                 1.05         97.90      49.47
+# JSON-Z85P            0.001241             0.001487             1.31                 1.07         91.12      46.09
+# JSON-B32             0.000679             0.001196             1.60                 1.15         53.26      27.20
+# JSON-HEX             0.000008             0.000008             2.00                 30.74        1.00       15.87
diff --git a/hivemind_bus_client/encodings/z85b.py b/hivemind_bus_client/encodings/z85b.py
@@ -1,108 +1,10 @@
-"""
-Python implementation of Z85b 85-bit encoding.
-
-Z85b is a variation of ZMQ RFC 32 Z85 85-bit encoding with the following differences:
-1. Little-endian encoding (to facilitate alignment with lower byte indices).
-2. No requirement for a multiple of 4/5 length.
-3. `decode_z85b()` eliminates whitespace from the input.
-4. `decode_z85b()` raises a clear exception if invalid characters are encountered.
-
-This file is a derivative work of https://gist.github.com/minrk/6357188?permalink_comment_id=2366506#gistcomment-2366506
-
-Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
-Distributed under the terms of the New BSD License.
-"""
-import re
-import struct
-from typing import Union
-
-from hivemind_bus_client.exceptions import Z85DecodeError
-
-
-class Z85B:
-    # Z85CHARS is the base 85 symbol table
-    Z85CHARS = bytearray(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#")
-
-    # Z85MAP maps integers in [0, 84] to the appropriate character in Z85CHARS
-    Z85MAP = {char: idx for idx, char in enumerate(Z85CHARS)}
-
-    # Powers of 85 for encoding/decoding
-    _85s = [85 ** i for i in range(5)]
-
-    # Padding lengths for encoding and decoding
-    _E_PADDING = [0, 3, 2, 1]
-    _D_PADDING = [0, 4, 3, 2, 1]
-
-    @classmethod
-    def encode(cls, data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
-        """
-        Encode raw bytes into Z85b format.
-
-        Args:
-            data (Union[str, bytes]): Input data to encode.
-            encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
-
-        Returns:
-            bytes: Z85b-encoded bytes.
-        """
-        if isinstance(data, str):
-            data = data.encode(encoding)
-        data = bytearray(data)
-        padding = cls._E_PADDING[len(data) % 4]
-        data += b'\x00' * padding
-        nvalues = len(data) // 4
-
-        # Pack the raw bytes into little-endian 32-bit integers
-        values = struct.unpack(f'<{nvalues}I', data)
-        encoded = bytearray()
-
-        for value in values:
-            for offset in cls._85s:
-                encoded.append(cls.Z85CHARS[(value // offset) % 85])
-
-        # Remove padding characters from the encoded output
-        if padding:
-            encoded = encoded[:-padding]
-        return bytes(encoded)
-
-    @classmethod
-    def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
-        """
-        Decode Z85b-encoded bytes into raw bytes.
-
-        Args:
-            encoded_data (Union[str, bytes]): Z85b-encoded data.
-            encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
-
-        Returns:
-            bytes: Decoded raw bytes.
-
-        Raises:
-            Z85DecodeError: If invalid characters are encountered during decoding.
-        """
-        # Normalize input by removing whitespace
-        encoded_data = bytearray(re.sub(rb'\s+', b'',
-                                        encoded_data if isinstance(encoded_data, bytes)
-                                        else encoded_data.encode(encoding)))
-        padding = cls._D_PADDING[len(encoded_data) % 5]
-        nvalues = (len(encoded_data) + padding) // 5
-
-        values = []
-        for i in range(0, len(encoded_data), 5):
-            value = 0
-            for j, offset in enumerate(cls._85s):
-                try:
-                    value += cls.Z85MAP[encoded_data[i + j]] * offset
-                except IndexError:
-                    break  # End of input reached
-                except KeyError as e:
-                    raise Z85DecodeError(f"Invalid byte code: {e.args[0]!r}")
-            values.append(value)
-
-        # Unpack the values back into raw bytes
-        decoded = struct.pack(f'<{nvalues}I', *values)
-
-        # Remove padding from the decoded output
-        if padding:
-            decoded = decoded[:-padding]
-        return decoded
+from z85base91 import Z85B
+import warnings
+
+# Deprecation warning
+warnings.warn(
+    "Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
+    "Please update your code to use the new package 'z85base91'",
+    DeprecationWarning,
+    stacklevel=2,
+)