Skip to content

Commit

Permalink
refactor: move to z85base91 package
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Jan 8, 2025
1 parent 89f4b2c commit 2ad8118
Show file tree
Hide file tree
Showing 15 changed files with 73 additions and 594 deletions.
58 changes: 0 additions & 58 deletions .github/workflows/unit_tests.yml

This file was deleted.

13 changes: 10 additions & 3 deletions hivemind_bus_client/encodings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
from hivemind_bus_client.encodings.z85b import Z85B
from hivemind_bus_client.encodings.z85p import Z85P
from hivemind_bus_client.encodings.b91 import B91
from z85base91 import Z85B, Z85P, B91
import warnings

# Deprecation warning
warnings.warn(
"Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
"Please update your code to use the new package 'z85base91'",
DeprecationWarning,
stacklevel=2,
)
110 changes: 10 additions & 100 deletions hivemind_bus_client/encodings/b91.py
Original file line number Diff line number Diff line change
@@ -1,100 +1,10 @@
from typing import Union


class B91:
ALPHABET = [
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '!', '#', '$',
'%', '&', '(', ')', '*', '+', ',', '.', '/', ':', ';', '<', '=',
'>', '?', '@', '[', ']', '^', '_', '`', '{', '|', '}', '~', '"'
]

DECODE_TABLE = {char: idx for idx, char in enumerate(ALPHABET)}

@classmethod
def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
"""
Decodes a Base91-encoded string into its original binary form.
Args:
encoded_data (Union[str, bytes]): Base91-encoded input data. If `bytes`, it is decoded as UTF-8.
encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
Returns:
bytes: The decoded binary data.
Raises:
ValueError: If the input contains invalid Base91 characters.
"""
if isinstance(encoded_data, bytes):
encoded_data = encoded_data.decode(encoding)

v = -1
b = 0
n = 0
out = bytearray()

for char in encoded_data:
if char not in cls.DECODE_TABLE:
raise ValueError(f"Invalid Base91 character: {char}")
c = cls.DECODE_TABLE[char]
if v < 0:
v = c
else:
v += c * 91
b |= v << n
n += 13 if (v & 8191) > 88 else 14
while n >= 8:
out.append(b & 255)
b >>= 8
n -= 8
v = -1

if v >= 0:
out.append((b | v << n) & 255)

return bytes(out)

@classmethod
def encode(cls, data: Union[bytes, str], encoding: str = "utf-8") -> bytes:
"""
Encodes binary data into a Base91-encoded string.
Args:
data (Union[bytes, str]): Input binary data to encode. If `str`, it is encoded as UTF-8.
encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
Returns:
str: The Base91-encoded string.
"""
if isinstance(data, str):
data = data.encode(encoding)

b = 0
n = 0
out = []

for byte in data:
b |= byte << n
n += 8
if n > 13:
v = b & 8191
if v > 88:
b >>= 13
n -= 13
else:
v = b & 16383
b >>= 14
n -= 14
out.append(cls.ALPHABET[v % 91])
out.append(cls.ALPHABET[v // 91])

if n:
out.append(cls.ALPHABET[b % 91])
if n > 7 or b > 90:
out.append(cls.ALPHABET[b // 91])

return ''.join(out).encode(encoding)
from z85base91 import B91
import warnings

# Deprecation warning
warnings.warn(
"Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
"Please update your code to use the new package 'z85base91'",
DeprecationWarning,
stacklevel=2,
)
18 changes: 17 additions & 1 deletion hivemind_bus_client/encodings/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def save_detailed_results_to_markdown(results: dict, filename: str):
@click.command()
@click.option("--sizes", default="10,100,1000,5000,10000,50000", help="Data sizes to benchmark, comma-separated.")
@click.option("--weights", default="0.5,0.5", help="Weights for performance and bandwidth, comma-separated.")
@click.option("--iterations", default=20, help="Number of iterations to average results.")
@click.option("--iterations", default=1000, help="Number of iterations to average results.")
def main(sizes: str, weights: str, iterations: int):
global performance_weight, bandwidth_weight

Expand Down Expand Up @@ -258,3 +258,19 @@ def main(sizes: str, weights: str, iterations: int):

if __name__ == "__main__":
main()

#



# Benchmark Results (new):
# Encoding Avg Encoding Time Avg Decoding Time Avg Size Increase Performance Bandwidth Aggregate
# ==============================================================================================================
# JSON-B64 0.000001 0.000004 1.38 100.00 81.64 90.82
# JSON-URLSAFE-B64 0.000002 0.000005 1.38 73.23 81.64 77.43
# JSON-B64-stdlib 0.000009 0.000009 1.38 27.29 81.64 54.46
# JSON-B91 0.001880 0.002634 1.24 1.00 100.00 50.50
# JSON-Z85B 0.001361 0.001661 1.26 1.05 97.90 49.47
# JSON-Z85P 0.001241 0.001487 1.31 1.07 91.12 46.09
# JSON-B32 0.000679 0.001196 1.60 1.15 53.26 27.20
# JSON-HEX 0.000008 0.000008 2.00 30.74 1.00 15.87
118 changes: 10 additions & 108 deletions hivemind_bus_client/encodings/z85b.py
Original file line number Diff line number Diff line change
@@ -1,108 +1,10 @@
"""
Python implementation of Z85b 85-bit encoding.
Z85b is a variation of ZMQ RFC 32 Z85 85-bit encoding with the following differences:
1. Little-endian encoding (to facilitate alignment with lower byte indices).
2. No requirement for a multiple of 4/5 length.
3. `decode_z85b()` eliminates whitespace from the input.
4. `decode_z85b()` raises a clear exception if invalid characters are encountered.
This file is a derivative work of https://gist.github.com/minrk/6357188?permalink_comment_id=2366506#gistcomment-2366506
Copyright (c) 2013 Brian Granger, Min Ragan-Kelley
Distributed under the terms of the New BSD License.
"""
import re
import struct
from typing import Union

from hivemind_bus_client.exceptions import Z85DecodeError


class Z85B:
# Z85CHARS is the base 85 symbol table
Z85CHARS = bytearray(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#")

# Z85MAP maps integers in [0, 84] to the appropriate character in Z85CHARS
Z85MAP = {char: idx for idx, char in enumerate(Z85CHARS)}

# Powers of 85 for encoding/decoding
_85s = [85 ** i for i in range(5)]

# Padding lengths for encoding and decoding
_E_PADDING = [0, 3, 2, 1]
_D_PADDING = [0, 4, 3, 2, 1]

@classmethod
def encode(cls, data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
"""
Encode raw bytes into Z85b format.
Args:
data (Union[str, bytes]): Input data to encode.
encoding (str): The encoding to use if `data` is provided as a string. Default is 'utf-8'.
Returns:
bytes: Z85b-encoded bytes.
"""
if isinstance(data, str):
data = data.encode(encoding)
data = bytearray(data)
padding = cls._E_PADDING[len(data) % 4]
data += b'\x00' * padding
nvalues = len(data) // 4

# Pack the raw bytes into little-endian 32-bit integers
values = struct.unpack(f'<{nvalues}I', data)
encoded = bytearray()

for value in values:
for offset in cls._85s:
encoded.append(cls.Z85CHARS[(value // offset) % 85])

# Remove padding characters from the encoded output
if padding:
encoded = encoded[:-padding]
return bytes(encoded)

@classmethod
def decode(cls, encoded_data: Union[str, bytes], encoding: str = "utf-8") -> bytes:
"""
Decode Z85b-encoded bytes into raw bytes.
Args:
encoded_data (Union[str, bytes]): Z85b-encoded data.
encoding (str): The encoding to use if `encoded_data` is provided as a string. Default is 'utf-8'.
Returns:
bytes: Decoded raw bytes.
Raises:
Z85DecodeError: If invalid characters are encountered during decoding.
"""
# Normalize input by removing whitespace
encoded_data = bytearray(re.sub(rb'\s+', b'',
encoded_data if isinstance(encoded_data, bytes)
else encoded_data.encode(encoding)))
padding = cls._D_PADDING[len(encoded_data) % 5]
nvalues = (len(encoded_data) + padding) // 5

values = []
for i in range(0, len(encoded_data), 5):
value = 0
for j, offset in enumerate(cls._85s):
try:
value += cls.Z85MAP[encoded_data[i + j]] * offset
except IndexError:
break # End of input reached
except KeyError as e:
raise Z85DecodeError(f"Invalid byte code: {e.args[0]!r}")
values.append(value)

# Unpack the values back into raw bytes
decoded = struct.pack(f'<{nvalues}I', *values)

# Remove padding from the decoded output
if padding:
decoded = decoded[:-padding]
return decoded
from z85base91 import Z85B
import warnings

# Deprecation warning
warnings.warn(
"Importing from hivemind_bus_client.encodings is deprecated and will be removed in a future release. "
"Please update your code to use the new package 'z85base91'",
DeprecationWarning,
stacklevel=2,
)
Loading

0 comments on commit 2ad8118

Please sign in to comment.