Skip to content

Commit

Permalink
extract alphabet size logic
Browse files Browse the repository at this point in the history
  • Loading branch information
tybug committed Sep 5, 2024
1 parent d2fda29 commit 5bc3849
Showing 1 changed file with 32 additions and 37 deletions.
69 changes: 32 additions & 37 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,30 @@ def _repr_pretty_(self, p, cycle):
MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000


def _count_distinct_strings(*, alphabet_size, min_size, max_size):
# We want to estimate if we're going to have more children than
# MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
# extremely expensive pow. We'll check if the number of strings in
# the largest string size alone is enough to put us over this limit.
# We'll also employ a trick of estimating against log, which is cheaper
# than computing a pow.
#
# x = max_size
# y = alphabet_size
# n = MAX_CHILDREN_EFFECTIVELY_INFINITE
#
# x**y > n
# <=> log(x**y) > log(n)
# <=> y * log(x) > log(n)
definitely_too_large = max_size * math.log(alphabet_size) > math.log(
MAX_CHILDREN_EFFECTIVELY_INFINITE
)
if definitely_too_large:
return MAX_CHILDREN_EFFECTIVELY_INFINITE

return sum(alphabet_size**k for k in range(min_size, max_size + 1))


def compute_max_children(ir_type, kwargs):
if ir_type == "integer":
min_value = kwargs["min_value"]
Expand Down Expand Up @@ -176,16 +200,9 @@ def compute_max_children(ir_type, kwargs):
return 1
return 2
elif ir_type == "bytes":
min_size = kwargs["min_size"]
max_size = kwargs["max_size"]

definitely_too_large = max_size * math.log(2**8) > math.log(
MAX_CHILDREN_EFFECTIVELY_INFINITE
return _count_distinct_strings(
alphabet_size=2**8, min_size=kwargs["min_size"], max_size=kwargs["max_size"]
)
if definitely_too_large:
return MAX_CHILDREN_EFFECTIVELY_INFINITE

return sum(2 ** (8 * k) for k in range(min_size, max_size + 1))
elif ir_type == "string":
min_size = kwargs["min_size"]
max_size = kwargs["max_size"]
Expand All @@ -196,36 +213,14 @@ def compute_max_children(ir_type, kwargs):
# Only possibility is the empty string.
return 1

# We want to estimate if we're going to have more children than
# MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
# extremely expensive pow. We'll check if the number of strings in
# the largest string size alone is enough to put us over this limit.
# We'll also employ a trick of estimating against log, which is cheaper
# than computing a pow.
#
# x = max_size
# y = len(intervals)
# n = MAX_CHILDREN_EFFECTIVELY_INFINITE
#
# x**y > n
# <=> log(x**y) > log(n)
# <=> y * log(x) > log(n)

# avoid math.log(1) == 0 and incorrectly failing the below estimate,
# even when we definitely are too large.
if len(intervals) == 1:
definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
else:
definitely_too_large = max_size * math.log(len(intervals)) > math.log(
MAX_CHILDREN_EFFECTIVELY_INFINITE
)

if definitely_too_large:
# avoid math.log(1) == 0 and incorrectly failing our effectively_infinite
# estimate, even when we definitely are too large.
if len(intervals) == 1 and max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE:
return MAX_CHILDREN_EFFECTIVELY_INFINITE

# number of strings of length k, for each k in [min_size, max_size].
return sum(len(intervals) ** k for k in range(min_size, max_size + 1))

return _count_distinct_strings(
alphabet_size=len(intervals), min_size=min_size, max_size=max_size
)
elif ir_type == "float":
min_value = kwargs["min_value"]
max_value = kwargs["max_value"]
Expand Down

0 comments on commit 5bc3849

Please sign in to comment.