Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement extend for cached_test_function_ir #4159

Merged
merged 8 commits into from
Nov 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This patch updates some internals around how we determine an input is too large to finish generating.
74 changes: 74 additions & 0 deletions hypothesis-python/src/hypothesis/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import binascii
import json
import os
import struct
import sys
import warnings
from collections.abc import Iterable
Expand All @@ -27,6 +28,7 @@

from hypothesis.configuration import storage_directory
from hypothesis.errors import HypothesisException, HypothesisWarning
from hypothesis.internal.conjecture.data import IRType
from hypothesis.utils.conventions import not_set

__all__ = [
Expand Down Expand Up @@ -671,3 +673,75 @@ def move(self, src: bytes, dest: bytes, value: bytes) -> None:

def delete(self, key: bytes, value: bytes) -> None:
raise RuntimeError(self._read_only_message)


def ir_to_bytes(ir: Iterable[IRType], /) -> bytes:
Copy link
Member Author

@tybug tybug Nov 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This implementation is lifted from your branch, with two fixes:

  • surrogatepass instead of surrogateescape (I don't recall my at-the-time justification of this, but if you run your test case for long enough you will get an error with surrogateescape)
  • correct interpretation for negative ints? I think this also is caught by the test case if run for long enough

"""Serialize a list of IR elements to a bytestring. Inverts ir_from_bytes."""
# We use a custom serialization format for this, which might seem crazy - but our
# data is a flat sequence of elements, and standard tools like protobuf or msgpack
# don't deal well with e.g. nonstandard bit-pattern-NaNs, or invalid-utf8 unicode.
#
# We simply encode each element with a metadata byte, if needed a uint16 size, and
# then the payload bytes. For booleans, the payload is inlined into the metadata.
parts = []
for elem in ir:
if isinstance(elem, bool):
# `000_0000v` - tag zero, low bit payload.
parts.append(b"\1" if elem else b"\0")
continue

# `tag_ssss [uint16 size?] [payload]`
if isinstance(elem, float):
tag = 1 << 5
elem = struct.pack("!d", elem)
elif isinstance(elem, int):
tag = 2 << 5
elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True)
elif isinstance(elem, bytes):
tag = 3 << 5
else:
assert isinstance(elem, str)
tag = 4 << 5
elem = elem.encode(errors="surrogatepass")

size = len(elem)
if size < 0b11111:
parts.append((tag | size).to_bytes(1, "big"))
else:
parts.append((tag | 0b11111).to_bytes(1, "big"))
parts.append(struct.pack("!H", size))
parts.append(elem)

return b"".join(parts)


def ir_from_bytes(buffer: bytes, /) -> list[IRType]:
"""Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes."""
# See above for an explanation of the format.
parts: list[IRType] = []
idx = 0
while idx < len(buffer):
tag = buffer[idx] >> 5
size = buffer[idx] & 0b11111
idx += 1

if tag == 0:
parts.append(bool(size))
continue
if size == 0b11111:
(size,) = struct.unpack_from("!H", buffer, offset=idx)
idx += 2
chunk = buffer[idx : idx + size]
idx += size

if tag == 1:
assert size == 8, "expected float64"
parts.extend(struct.unpack("!d", chunk))
elif tag == 2:
parts.append(int.from_bytes(chunk, "big", signed=True))
elif tag == 3:
parts.append(chunk)
else:
assert tag == 4
parts.append(chunk.decode(errors="surrogatepass"))
return parts
Loading
Loading