diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
new file mode 100644
index 0000000000..d76173578e
--- /dev/null
+++ b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: patch
+
+This release improves our distribution of generated values for all strategies, by doing a better job of tracking which values we have generated before and avoiding generating them again.
+
+For example, ``st.lists(st.integers())`` previously generated ~5 each of ``[]`` ``[0]`` in 100 examples. In this release, each of ``[]`` and ``[0]`` are generated ~1-2 times each.
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index bc850254b9..1f56d2ccf3 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -30,6 +30,7 @@
     Set,
     Tuple,
     Type,
+    TypedDict,
     TypeVar,
     Union,
 )
@@ -163,6 +164,8 @@ def structural_coverage(label: int) -> StructuralCoverageTag:
 
 FLOAT_INIT_LOGIC_CACHE = LRUReusedCache(4096)
 
+DRAW_STRING_DEFAULT_MAX_SIZE = 10**10  # "arbitrarily large"
+
 
 class Example:
     """Examples track the hierarchical structure of draws from the byte stream,
@@ -794,6 +797,34 @@ def as_result(self) -> "_Overrun":
 MAX_DEPTH = 100
 
 
+class IntegerKWargs(TypedDict):
+    min_value: Optional[int]
+    max_value: Optional[int]
+    weights: Optional[Sequence[float]]
+    shrink_towards: int
+
+
+class FloatKWargs(TypedDict):
+    min_value: float
+    max_value: float
+    allow_nan: bool
+    smallest_nonzero_magnitude: float
+
+
+class StringKWargs(TypedDict):
+    intervals: IntervalSet
+    min_size: int
+    max_size: Optional[int]
+
+
+class BytesKWargs(TypedDict):
+    size: int
+
+
+class BooleanKWargs(TypedDict):
+    p: float
+
+
 class DataObserver:
     """Observer class for recording the behaviour of a
     ConjectureData object, primarily used for tracking
@@ -810,18 +841,34 @@ def conclude_test(
         Note that this is called after ``freeze`` has completed.
         """
 
-    def draw_bits(self, n_bits: int, *, forced: bool, value: int) -> None:
-        """Called when ``draw_bits`` is called on on the
-        observed ``ConjectureData``.
-        * ``n_bits`` is the number of bits drawn.
-        *  ``forced`` is True if the corresponding
-           draw was forced or ``False`` otherwise.
-        * ``value`` is the result that ``draw_bits`` returned.
-        """
-
     def kill_branch(self) -> None:
         """Mark this part of the tree as not worth re-exploring."""
 
+    def draw_integer(
+        self, value: int, *, was_forced: bool, kwargs: IntegerKWargs
+    ) -> None:
+        pass
+
+    def draw_float(
+        self, value: float, *, was_forced: bool, kwargs: FloatKWargs
+    ) -> None:
+        pass
+
+    def draw_string(
+        self, value: str, *, was_forced: bool, kwargs: StringKWargs
+    ) -> None:
+        pass
+
+    def draw_bytes(
+        self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs
+    ) -> None:
+        pass
+
+    def draw_boolean(
+        self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs
+    ) -> None:
+        pass
+
 
 @dataclass_transform()
 @attr.s(slots=True)
@@ -995,7 +1042,7 @@ def draw_integer(
             assert min_value is not None
             assert max_value is not None
 
-            sampler = Sampler(weights)
+            sampler = Sampler(weights, observe=False)
             gap = max_value - shrink_towards
 
             forced_idx = None
@@ -1023,7 +1070,7 @@ def draw_integer(
                 probe = shrink_towards + self._draw_unbounded_integer(
                     forced=None if forced is None else forced - shrink_towards
                 )
-                self._cd.stop_example(discard=max_value < probe)
+                self._cd.stop_example()
             return probe
 
         if max_value is None:
@@ -1034,7 +1081,7 @@ def draw_integer(
                 probe = shrink_towards + self._draw_unbounded_integer(
                     forced=None if forced is None else forced - shrink_towards
                 )
-                self._cd.stop_example(discard=probe < min_value)
+                self._cd.stop_example()
             return probe
 
         return self._draw_bounded_integer(
@@ -1091,7 +1138,7 @@ def draw_float(
                     assert pos_clamper is not None
                     clamped = pos_clamper(result)
                 if clamped != result and not (math.isnan(result) and allow_nan):
-                    self._cd.stop_example(discard=True)
+                    self._cd.stop_example()
                     self._cd.start_example(DRAW_FLOAT_LABEL)
                     self._draw_float(forced=clamped)
                     result = clamped
@@ -1113,7 +1160,7 @@ def draw_string(
         forced: Optional[str] = None,
     ) -> str:
         if max_size is None:
-            max_size = 10**10  # "arbitrarily large"
+            max_size = DRAW_STRING_DEFAULT_MAX_SIZE
 
         assert forced is None or min_size <= len(forced) <= max_size
 
@@ -1129,6 +1176,7 @@ def draw_string(
             max_size=max_size,
             average_size=average_size,
             forced=None if forced is None else len(forced),
+            observe=False,
         )
         while elements.more():
             forced_i: Optional[int] = None
@@ -1264,7 +1312,7 @@ def _draw_bounded_integer(
             probe = self._cd.draw_bits(
                 bits, forced=None if forced is None else abs(forced - center)
             )
-            self._cd.stop_example(discard=probe > gap)
+            self._cd.stop_example()
 
         if above:
             result = center + probe
@@ -1356,7 +1404,7 @@ def permitted(f):
         ]
         nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)]
         weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats)
-        sampler = Sampler(weights) if nasty_floats else None
+        sampler = Sampler(weights, observe=False) if nasty_floats else None
 
         pos_clamper = neg_clamper = None
         if sign_aware_lte(0.0, max_value):
@@ -1465,6 +1513,19 @@ def __repr__(self):
             ", frozen" if self.frozen else "",
         )
 
+    # A bit of explanation of the `observe` argument in our draw_* functions.
+    #
+    # There are two types of draws: sub-ir and super-ir. For instance, some ir
+    # nodes use `many`, which in turn calls draw_boolean. But some strategies
+    # also use many, at the super-ir level. We don't want to write sub-ir draws
+    # to the DataTree (and consequently use them when computing novel prefixes),
+    # since they are fully recorded by writing the ir node itself.
+    # But super-ir draws are not included in the ir node, so we do want to write
+    # these to the tree.
+    #
+    # `observe` formalizes this distinction. The draw will only be written to
+    # the DataTree if observe is True.
+
     def draw_integer(
         self,
         min_value: Optional[int] = None,
@@ -1474,6 +1535,7 @@ def draw_integer(
         weights: Optional[Sequence[float]] = None,
         shrink_towards: int = 0,
         forced: Optional[int] = None,
+        observe: bool = True,
     ) -> int:
         # Validate arguments
         if weights is not None:
@@ -1494,13 +1556,18 @@ def draw_integer(
         if forced is not None and max_value is not None:
             assert forced <= max_value
 
-        return self.provider.draw_integer(
-            min_value=min_value,
-            max_value=max_value,
-            weights=weights,
-            shrink_towards=shrink_towards,
-            forced=forced,
-        )
+        kwargs: IntegerKWargs = {
+            "min_value": min_value,
+            "max_value": max_value,
+            "weights": weights,
+            "shrink_towards": shrink_towards,
+        }
+        value = self.provider.draw_integer(**kwargs, forced=forced)
+        if observe:
+            self.observer.draw_integer(
+                value, was_forced=forced is not None, kwargs=kwargs
+            )
+        return value
 
     def draw_float(
         self,
@@ -1514,6 +1581,7 @@ def draw_float(
         # width: Literal[16, 32, 64] = 64,
         # exclude_min and exclude_max handled higher up,
         forced: Optional[float] = None,
+        observe: bool = True,
     ) -> float:
         assert smallest_nonzero_magnitude > 0
         assert not math.isnan(min_value)
@@ -1523,13 +1591,18 @@ def draw_float(
             assert allow_nan or not math.isnan(forced)
             assert math.isnan(forced) or min_value <= forced <= max_value
 
-        return self.provider.draw_float(
-            min_value=min_value,
-            max_value=max_value,
-            allow_nan=allow_nan,
-            smallest_nonzero_magnitude=smallest_nonzero_magnitude,
-            forced=forced,
-        )
+        kwargs: FloatKWargs = {
+            "min_value": min_value,
+            "max_value": max_value,
+            "allow_nan": allow_nan,
+            "smallest_nonzero_magnitude": smallest_nonzero_magnitude,
+        }
+        value = self.provider.draw_float(**kwargs, forced=forced)
+        if observe:
+            self.observer.draw_float(
+                value, kwargs=kwargs, was_forced=forced is not None
+            )
+        return value
 
     def draw_string(
         self,
@@ -1538,19 +1611,44 @@ def draw_string(
         min_size: int = 0,
         max_size: Optional[int] = None,
         forced: Optional[str] = None,
+        observe: bool = True,
     ) -> str:
         assert forced is None or min_size <= len(forced)
-        return self.provider.draw_string(
-            intervals, min_size=min_size, max_size=max_size, forced=forced
-        )
 
-    def draw_bytes(self, size: int, *, forced: Optional[bytes] = None) -> bytes:
+        kwargs: StringKWargs = {
+            "intervals": intervals,
+            "min_size": min_size,
+            "max_size": max_size,
+        }
+        value = self.provider.draw_string(**kwargs, forced=forced)
+        if observe:
+            self.observer.draw_string(
+                value, kwargs=kwargs, was_forced=forced is not None
+            )
+        return value
+
+    def draw_bytes(
+        self,
+        # TODO move to min_size and max_size here.
+        size: int,
+        *,
+        forced: Optional[bytes] = None,
+        observe: bool = True,
+    ) -> bytes:
         assert forced is None or len(forced) == size
         assert size >= 0
 
-        return self.provider.draw_bytes(size, forced=forced)
+        kwargs: BytesKWargs = {"size": size}
+        value = self.provider.draw_bytes(**kwargs, forced=forced)
+        if observe:
+            self.observer.draw_bytes(
+                value, kwargs=kwargs, was_forced=forced is not None
+            )
+        return value
 
-    def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool:
+    def draw_boolean(
+        self, p: float = 0.5, *, forced: Optional[bool] = None, observe: bool = True
+    ) -> bool:
         # Internally, we treat probabilities lower than 1 / 2**64 as
         # unconditionally false.
         #
@@ -1561,7 +1659,13 @@ def draw_boolean(self, p: float = 0.5, *, forced: Optional[bool] = None) -> bool
         if forced is False:
             assert p < (1 - 2 ** (-64))
 
-        return self.provider.draw_boolean(p, forced=forced)
+        kwargs: BooleanKWargs = {"p": p}
+        value = self.provider.draw_boolean(**kwargs, forced=forced)
+        if observe:
+            self.observer.draw_boolean(
+                value, kwargs=kwargs, was_forced=forced is not None
+            )
+        return value
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
         """Convert the result of running this test into
@@ -1735,9 +1839,15 @@ def freeze(self) -> None:
         self.buffer = bytes(self.buffer)
         self.observer.conclude_test(self.status, self.interesting_origin)
 
-    def choice(self, values: Sequence[T], *, forced: Optional[T] = None) -> T:
+    def choice(
+        self,
+        values: Sequence[T],
+        *,
+        forced: Optional[T] = None,
+        observe: bool = True,
+    ) -> T:
         forced_i = None if forced is None else values.index(forced)
-        i = self.draw_integer(0, len(values) - 1, forced=forced_i)
+        i = self.draw_integer(0, len(values) - 1, forced=forced_i, observe=observe)
         return values[i]
 
     def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
@@ -1774,7 +1884,6 @@ def draw_bits(self, n: int, *, forced: Optional[int] = None) -> int:
         buf = bytes(buf)
         result = int_from_bytes(buf)
 
-        self.observer.draw_bits(n, forced=forced is not None, value=result)
         self.__example_record.draw_bits(n, forced)
 
         initial = self.index
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
index d82ed3ca67..4669ca4d39 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
@@ -8,17 +8,38 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
+import itertools
+import math
+from typing import TYPE_CHECKING, List, Literal, Optional, Union
+
 import attr
 
 from hypothesis.errors import Flaky, HypothesisException, StopTest
+from hypothesis.internal import floats as flt
 from hypothesis.internal.compat import int_to_bytes
 from hypothesis.internal.conjecture.data import (
+    BooleanKWargs,
+    BytesKWargs,
     ConjectureData,
     DataObserver,
+    FloatKWargs,
+    IntegerKWargs,
     Status,
-    bits_to_bytes,
+    StringKWargs,
 )
-from hypothesis.internal.conjecture.junkdrawer import IntList
+from hypothesis.internal.floats import count_between_floats, float_to_int, int_to_float
+
+if TYPE_CHECKING:
+    from typing import TypeAlias
+else:
+    TypeAlias = object
+
+IRType: TypeAlias = Union[int, str, bool, float, bytes]
+IRKWargsType: TypeAlias = Union[
+    IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
+]
+# this would be "IRTypeType", but that's just confusing.
+IRLiteralType: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
 
 
 class PreviouslyUnseenBehaviour(HypothesisException):
@@ -51,12 +72,15 @@ class Branch:
     """Represents a transition where multiple choices can be made as to what
     to drawn."""
 
-    bit_length = attr.ib()
+    kwargs = attr.ib()
+    ir_type = attr.ib()
     children = attr.ib(repr=False)
 
     @property
     def max_children(self):
-        return 1 << self.bit_length
+        max_children = compute_max_children(self.ir_type, self.kwargs)
+        assert max_children > 0
+        return max_children
 
 
 @attr.s(slots=True, frozen=True)
@@ -67,61 +91,275 @@ class Conclusion:
     interesting_origin = attr.ib()
 
 
+# The number of max children where, beyond this, it is practically impossible
+# for hypothesis to saturate / explore all children nodes in a reasonable time
+# frame. We use this to bail out of expensive max children computations early,
+# where the numbers involved are so large that we know they will be larger than
+# this number.
+#
+# Note that it's ok for us to underestimate the number of max children of a node
+# by using this. We just may think the node is exhausted when in fact it has more
+# possible children to be explored. This has the potential to finish generation
+# early due to exhausting the entire tree, but that is quite unlikely: (1) the
+# number of examples would have to be quite high, and (2) the tree would have to
+# contain only one or two nodes, or generate_novel_prefix would simply switch to
+# exploring another non-exhausted node.
+#
+# Also note that we may sometimes compute max children above this value. In other
+# words, this is *not* a hard maximum on the computed max children. It's the point
+# where further computation is not beneficial - but sometimes doing that computation
+# unconditionally is cheaper than estimating against this value.
+#
+# The one case where this may be detrimental is fuzzing, where the throughput of
+# examples is so high that it really may saturate important nodes. We'll cross
+# that bridge when we come to it.
+MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000
+
+
+def compute_max_children(ir_type, kwargs):
+    from hypothesis.internal.conjecture.data import DRAW_STRING_DEFAULT_MAX_SIZE
+
+    if ir_type == "integer":
+        min_value = kwargs["min_value"]
+        max_value = kwargs["max_value"]
+        weights = kwargs["weights"]
+
+        if min_value is None and max_value is None:
+            # full 128 bit range.
+            return 2**128 - 1
+        if min_value is not None and max_value is not None:
+            # count between min/max value.
+            n = max_value - min_value + 1
+            # remove any values with a zero probability of being drawn (weight=0).
+            if weights is not None:
+                n -= sum(weight == 0 for weight in weights)
+            return n
+
+        # hard case: only one bound was specified. Here we probe either upwards
+        # or downwards with our full 128 bit generation, but only half of these
+        # (plus one for the case of generating zero) result in a probe in the
+        # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127
+        assert (min_value is None) ^ (max_value is None)
+        return 2**127
+    elif ir_type == "boolean":
+        p = kwargs["p"]
+        # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice.
+        if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)):
+            return 1
+        return 2
+    elif ir_type == "bytes":
+        return 2 ** (8 * kwargs["size"])
+    elif ir_type == "string":
+        min_size = kwargs["min_size"]
+        max_size = kwargs["max_size"]
+        intervals = kwargs["intervals"]
+
+        if max_size is None:
+            max_size = DRAW_STRING_DEFAULT_MAX_SIZE
+
+        if len(intervals) == 0:
+            # Special-case the empty alphabet to avoid an error in math.log(0).
+            # Only possibility is the empty string.
+            return 1
+
+        # We want to estimate if we're going to have more children than
+        # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
+        # extremely expensive pow. We'll check if the number of strings in
+        # the largest string size alone is enough to put us over this limit.
+        # We'll also employ a trick of estimating against log, which is cheaper
+        # than computing a pow.
+        #
+        # x = max_size
+        # y = len(intervals)
+        # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
+        #
+        #     x**y > n
+        # <=> log(x**y)  > log(n)
+        # <=> y * log(x) > log(n)
+
+        # avoid math.log(1) == 0 and incorrectly failing the below estimate,
+        # even when we definitely are too large.
+        if len(intervals) == 1:
+            definitely_too_large = max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE
+        else:
+            definitely_too_large = max_size * math.log(len(intervals)) > math.log(
+                MAX_CHILDREN_EFFECTIVELY_INFINITE
+            )
+
+        if definitely_too_large:
+            return MAX_CHILDREN_EFFECTIVELY_INFINITE
+
+        # number of strings of length k, for each k in [min_size, max_size].
+        return sum(len(intervals) ** k for k in range(min_size, max_size + 1))
+
+    elif ir_type == "float":
+        return count_between_floats(kwargs["min_value"], kwargs["max_value"])
+
+    raise NotImplementedError(f"unhandled ir_type {ir_type}")
+
+
+# In theory, this is a strict superset of the functionality of compute_max_children;
+#
+#   assert len(all_children(ir_type, kwargs)) == compute_max_children(ir_type, kwargs)
+#
+# In practice, we maintain two distinct implementations for efficiency and space
+# reasons. If you just need the number of children, it is cheaper to use
+# compute_max_children than to reify the list of children (only to immediately
+# throw it away).
+def all_children(ir_type, kwargs):
+    if ir_type == "integer":
+        min_value = kwargs["min_value"]
+        max_value = kwargs["max_value"]
+        weights = kwargs["weights"]
+        # it's a bit annoying (but completely feasible) to implement the cases
+        # other than "both sides bounded" here. We haven't needed to yet because
+        # in practice we don't struggle with unbounded integer generation.
+        assert min_value is not None
+        assert max_value is not None
+
+        if weights is None:
+            yield from range(min_value, max_value + 1)
+        else:
+            # skip any values with a corresponding weight of 0 (can never be drawn).
+            for weight, n in zip(weights, range(min_value, max_value + 1)):
+                if weight == 0:
+                    continue
+                yield n
+
+    if ir_type == "boolean":
+        p = kwargs["p"]
+        if p <= 2 ** (-64):
+            yield False
+        elif p >= (1 - 2 ** (-64)):
+            yield True
+        else:
+            yield from [False, True]
+    if ir_type == "bytes":
+        size = kwargs["size"]
+        yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size)))
+    if ir_type == "string":
+        min_size = kwargs["min_size"]
+        max_size = kwargs["max_size"]
+        intervals = kwargs["intervals"]
+
+        size = min_size
+        while size <= max_size:
+            for ords in itertools.product(intervals, repeat=size):
+                yield "".join(chr(n) for n in ords)
+            size += 1
+    if ir_type == "float":
+
+        def floats_between(a, b):
+            for n in range(float_to_int(a), float_to_int(b) + 1):
+                yield int_to_float(n)
+
+        min_value = kwargs["min_value"]
+        max_value = kwargs["max_value"]
+
+        if flt.is_negative(min_value):
+            if flt.is_negative(max_value):
+                # if both are negative, have to invert order
+                yield from floats_between(max_value, min_value)
+            else:
+                yield from floats_between(-0.0, min_value)
+                yield from floats_between(0.0, max_value)
+        else:
+            yield from floats_between(min_value, max_value)
+
+
 @attr.s(slots=True)
 class TreeNode:
-    """Node in a tree that corresponds to previous interactions with
-    a ``ConjectureData`` object according to some fixed test function.
-
-    This is functionally a variant patricia trie.
-    See https://en.wikipedia.org/wiki/Radix_tree for the general idea,
-    but what this means in particular here is that we have a very deep
-    but very lightly branching tree and rather than store this as a fully
-    recursive structure we flatten prefixes and long branches into
-    lists. This significantly compacts the storage requirements.
-
-    A single ``TreeNode`` corresponds to a previously seen sequence
-    of calls to ``ConjectureData`` which we have never seen branch,
-    followed by a ``transition`` which describes what happens next.
     """
+    A node, or collection of directly descended nodes, in a DataTree.
+
+    We store the DataTree as a radix tree (https://en.wikipedia.org/wiki/Radix_tree),
+    which means that nodes that are the only child of their parent are collapsed
+    into their parent to save space.
+
+    Conceptually, you can unfold a single TreeNode storing n values in its lists
+    into a sequence of n nodes, each a child of the last. In other words,
+    (kwargs[i], values[i], ir_types[i]) corresponds to the single node at index
+    i.
+
+    Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted
+    via the radix tree definition), then its lists will be empty and it will
+    store a `Branch` representing that choce in its `transition`.
+
+    Examples
+    --------
+
+    Consider sequentially drawing a boolean, then an integer.
+
+            data.draw_boolean()
+            data.draw_integer(1, 3)
+
+    If we draw True and then 2, the tree may conceptually look like this.
+
+                      ┌──────┐
+                      │ root │
+                      └──┬───┘
+                      ┌──┴───┐
+                      │ True │
+                      └──┬───┘
+                      ┌──┴───┐
+                      │  2   │
+                      └──────┘
+
+    But since 2 is the only child of True, we will compact these nodes and store
+    them as a single TreeNode.
+
+                      ┌──────┐
+                      │ root │
+                      └──┬───┘
+                    ┌────┴──────┐
+                    │ [True, 2] │
+                    └───────────┘
+
+    If we then draw True and then 3, True will have multiple children and we
+    can no longer store this compacted representation. We would call split_at(0)
+    on the [True, 2] node to indicate that we need to add a choice at 0-index
+    node (True).
 
-    # Records the previous sequence of calls to ``data.draw_bits``,
-    # with the ``n_bits`` argument going in ``bit_lengths`` and the
-    # values seen in ``values``. These should always have the same
-    # length.
-    bit_lengths = attr.ib(factory=IntList)
-    values = attr.ib(factory=IntList)
-
-    # The indices of of the calls to ``draw_bits`` that we have stored
-    # where  ``forced`` is not None. Stored as None if no indices
-    # have been forced, purely for space saving reasons (we force
-    # quite rarely).
-    __forced = attr.ib(default=None, init=False)
-
-    # What happens next after observing this sequence of calls.
-    # Either:
+                      ┌──────┐
+                      │ root │
+                      └──┬───┘
+                      ┌──┴───┐
+                    ┌─┤ True ├─┐
+                    │ └──────┘ │
+                  ┌─┴─┐      ┌─┴─┐
+                  │ 2 │      │ 3 │
+                  └───┘      └───┘
+    """
+
+    # The kwargs, value, and ir_types of the nodes stored here. These always
+    # have the same length. The values at index i belong to node i.
+    kwargs: List[IRKWargsType] = attr.ib(factory=list)
+    values: List[IRType] = attr.ib(factory=list)
+    ir_types: List[IRLiteralType] = attr.ib(factory=list)
+
+    # The indices of nodes which had forced values.
     #
-    # * ``None``, indicating we don't know yet.
-    # * A ``Branch`` object indicating that there is a ``draw_bits``
-    #   call that we have seen take multiple outcomes there.
-    # * A ``Conclusion`` object indicating that ``conclude_test``
-    #   was called here.
-    transition = attr.ib(default=None)
-
-    # A tree node is exhausted if every possible sequence of
-    # draws below it has been explored. We store this information
-    # on a field and update it when performing operations that
-    # could change the answer.
+    # Stored as None if no indices have been forced, purely for space saving
+    # reasons (we force quite rarely).
+    __forced: Optional[set] = attr.ib(default=None, init=False)
+
+    # What happens next after drawing these nodes. (conceptually, "what is the
+    # child/children of the last node stored here").
     #
-    # A node may start exhausted, e.g. because it it leads
-    # immediately to a conclusion, but can only go from
-    # non-exhausted to exhausted when one of its children
-    # becomes exhausted or it is marked as a conclusion.
+    # One of:
+    # - None (we don't know yet)
+    # - Branch (we have seen multiple possible outcomes here)
+    # - Conclusion (ConjectureData.conclude_test was called here)
+    # - Killed (this branch is valid and may even have children, but should not
+    #   be explored when generating novel prefixes)
+    transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None)
+
+    # A tree node is exhausted if every possible sequence of draws below it has
+    # been explored. We only update this when performing operations that could
+    # change the answer.
     #
-    # Therefore we only need to check whether we need to update
-    # this field when the node is first created in ``split_at``
-    # or when we have walked a path through this node to a
-    # conclusion in ``TreeRecordingObserver``.
-    is_exhausted = attr.ib(default=False, init=False)
+    # See also TreeNode.check_exhausted.
+    is_exhausted: bool = attr.ib(default=False, init=False)
 
     @property
     def forced(self):
@@ -130,17 +368,21 @@ def forced(self):
         return self.__forced
 
     def mark_forced(self, i):
-        """Note that the value at index ``i`` was forced."""
+        """
+        Note that the draw at node i was forced.
+        """
         assert 0 <= i < len(self.values)
         if self.__forced is None:
             self.__forced = set()
         self.__forced.add(i)
 
     def split_at(self, i):
-        """Splits the tree so that it can incorporate
-        a decision at the ``draw_bits`` call corresponding
-        to position ``i``, or raises ``Flaky`` if that was
-        meant to be a forced node."""
+        """
+        Splits the tree so that it can incorporate a decision at the draw call
+        corresponding to the node at position i.
+
+        Raises Flaky if node i was forced.
+        """
 
         if i in self.forced:
             inconsistent_generation()
@@ -150,26 +392,58 @@ def split_at(self, i):
         key = self.values[i]
 
         child = TreeNode(
-            bit_lengths=self.bit_lengths[i + 1 :],
+            ir_types=self.ir_types[i + 1 :],
+            kwargs=self.kwargs[i + 1 :],
             values=self.values[i + 1 :],
             transition=self.transition,
         )
-        self.transition = Branch(bit_length=self.bit_lengths[i], children={key: child})
+        self.transition = Branch(
+            kwargs=self.kwargs[i], ir_type=self.ir_types[i], children={key: child}
+        )
         if self.__forced is not None:
             child.__forced = {j - i - 1 for j in self.__forced if j > i}
             self.__forced = {j for j in self.__forced if j < i}
         child.check_exhausted()
+        del self.ir_types[i:]
         del self.values[i:]
-        del self.bit_lengths[i:]
-        assert len(self.values) == len(self.bit_lengths) == i
+        del self.kwargs[i:]
+        assert len(self.values) == len(self.kwargs) == len(self.ir_types) == i
 
     def check_exhausted(self):
-        """Recalculates ``self.is_exhausted`` if necessary then returns
-        it."""
+        """
+        Recalculates is_exhausted if necessary, and then returns it.
+
+        A node is exhausted if:
+        - Its transition is Conclusion or Killed
+        - It has the maximum number of children (i.e. we have found all of its
+          possible children), and all its children are exhausted
+
+        Therefore, we only need to compute this for a node when:
+        - We first create it in split_at
+        - We set its transition to either Conclusion or Killed
+          (TreeRecordingObserver.conclude_test or TreeRecordingObserver.kill_branch)
+        - We exhaust any of its children
+        """
+
         if (
+            # a node cannot go from is_exhausted -> not is_exhausted.
             not self.is_exhausted
-            and len(self.forced) == len(self.values)
+            # if we don't know what happens after this node, we don't have
+            # enough information to tell if it's exhausted.
             and self.transition is not None
+            # if there are still any nodes left which are the only child of their
+            # parent (len(self.values) > 0), then this TreeNode must be not
+            # exhausted, unless all of those nodes were forced.
+            #
+            # This is because we maintain an invariant of only adding nodes to
+            # DataTree which have at least 2 possible values, so we know that if
+            # they do not have any siblings that we still have more choices to
+            # discover.
+            #
+            # (We actually *do* currently add single-valued nodes to the tree,
+            # but immediately split them into a transition to avoid falsifying
+            # this check. this is a bit of a hack.)
+            and len(self.forced) == len(self.values)
         ):
             if isinstance(self.transition, (Conclusion, Killed)):
                 self.is_exhausted = True
@@ -181,16 +455,159 @@ def check_exhausted(self):
 
 
 class DataTree:
-    """Tracks the tree structure of a collection of ConjectureData
-    objects, for use in ConjectureRunner."""
+    """
+    A DataTree tracks the structured history of draws in some test function,
+    across multiple ConjectureData objects.
+
+    This information is used by ConjectureRunner to generate novel prefixes of
+    this tree (see generate_novel_prefix). A novel prefix is a sequence of draws
+    which the tree has not seen before, and therefore the ConjectureRunner has
+    not generated as an input to the test function before.
+
+    DataTree tracks the following:
+
+    - Draws, at the ir level (with some ir_type, e.g. "integer")
+      - ConjectureData.draw_integer()
+      - ConjectureData.draw_float()
+      - ConjectureData.draw_string()
+      - ConjectureData.draw_boolean()
+      - ConjectureData.draw_bytes()
+    - Test conclusions (with some Status, e.g. Status.VALID)
+      - ConjectureData.conclude_test()
+
+    A DataTree is — surprise — a *tree*. A node in this tree is either a draw with
+    some value, a test conclusion with some Status, or a special `Killed` value,
+    which denotes that further draws may exist beyond this node but should not be
+    considered worth exploring when generating novel prefixes. A node is a leaf
+    iff it is a conclusion or Killed.
+
+    A branch from node A to node B indicates that we have previously seen some
+    sequence (a, b) of draws, where a and b are the values in nodes A and B.
+    Similar intuition holds for conclusion and Killed nodes.
+
+    Examples
+    --------
+
+    To see how a DataTree gets built through successive sets of draws, consider
+    the following code that calls through to some ConjecutreData object `data`.
+    The first call can be either True or False, and the second call can be any
+    integer in the range [1, 3].
+
+        data.draw_boolean()
+        data.draw_integer(1, 3)
+
+    To start, the corresponding DataTree object is completely empty.
+
+                      ┌──────┐
+                      │ root │
+                      └──────┘
+
+    We happen to draw True and then 2 in the above code. The tree tracks this.
+    (2 also connects to a child Conclusion node with Status.VALID since it's the
+    final draw in the code. I'll omit Conclusion nodes in diagrams for brevity.)
+
+                      ┌──────┐
+                      │ root │
+                      └──┬───┘
+                      ┌──┴───┐
+                      │ True │
+                      └──┬───┘
+                      ┌──┴───┐
+                      │  2   │
+                      └──────┘
+
+    This is a very boring tree so far! But now we happen to draw False and
+    then 1. This causes a split in the tree. Remember, DataTree tracks history
+    over all invocations of a function, not just one. The end goal is to know
+    what invocations haven't been tried yet, after all.
+
+                      ┌──────┐
+                  ┌───┤ root ├───┐
+                  │   └──────┘   │
+               ┌──┴───┐        ┌─┴─────┐
+               │ True │        │ False │
+               └──┬───┘        └──┬────┘
+                ┌─┴─┐           ┌─┴─┐
+                │ 2 │           │ 1 │
+                └───┘           └───┘
+
+    If we were to ask DataTree for a novel prefix at this point, it might
+    generate any of (True, 1), (True, 3), (False, 2), or (False, 3).
+
+    Note that the novel prefix stops as soon as it generates a novel node. For
+    instance, if we had generated a novel prefix back when the tree was only
+    root -> True -> 2, we could have gotten any of (True, 1), (True, 3), or
+    (False). But we could *not* have gotten (False, n), because both False and
+    n were novel at that point, and we stop at the first novel node — False.
+
+    I won't belabor this example. Here's what the tree looks like when fully
+    explored:
+
+                      ┌──────┐
+               ┌──────┤ root ├──────┐
+               │      └──────┘      │
+            ┌──┴───┐              ┌─┴─────┐
+         ┌──┤ True ├──┐       ┌───┤ False ├──┐
+         │  └──┬───┘  │       │   └──┬────┘  │
+       ┌─┴─┐ ┌─┴─┐  ┌─┴─┐   ┌─┴─┐  ┌─┴─┐   ┌─┴─┐
+       │ 1 │ │ 2 │  │ 3 │   │ 1 │  │ 2 │   │ 3 │
+       └───┘ └───┘  └───┘   └───┘  └───┘   └───┘
+
+    You could imagine much more complicated trees than this arising in practice,
+    and indeed they do. In particular, the tree need not be balanced or 'nice'
+    like the tree above. For instance,
+
+        b = data.draw_boolean()
+        if b:
+            data.draw_integer(1, 3)
+
+    results in a tree with the entire right part lopped off, and False leading
+    straight to a conclusion node with Status.VALID. As another example,
+
+        n = data.draw_integers()
+        assume(n >= 3)
+        data.draw_string()
+
+    results in a tree with the 0, 1, and 2 nodes leading straight to a
+    conclusion node with Status.INVALID, and the rest branching off into all
+    the possibilities of draw_string.
+
+    Notes
+    -----
+
+    The above examples are slightly simplified and are intended to convey
+    intuition. In practice, there are some implementation details to be aware
+    of.
+
+    - In draw nodes, we store the kwargs used in addition to the value drawn.
+      E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5)
+      would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for
+      other kwargs omitted).
+
+      The kwargs parameters have the potential to change both the range of
+      possible outputs of a node, and the probability distribution within that
+      range, so we need to use these when drawing in DataTree as well. We draw
+      values using these kwargs when (1) generating a novel value for a node
+      and (2) choosing a random child when traversing the tree.
+
+    - For space efficiency, rather than tracking the full tree structure, we
+      store DataTree as a radix tree. This is conceptually equivalent (radix
+      trees can always be "unfolded" to the full tree) but it means the internal
+      representation may differ in practice.
+
+      See TreeNode for more information.
+    """
 
     def __init__(self):
         self.root = TreeNode()
+        self._children_cache = {}
 
     @property
     def is_exhausted(self):
-        """Returns True if every possible node is dead and thus the language
-        described must have been fully explored."""
+        """
+        Returns True if every node is exhausted, and therefore the tree has
+        been fully explored.
+        """
         return self.root.is_exhausted
 
     def generate_novel_prefix(self, random):
@@ -201,26 +618,43 @@ def generate_novel_prefix(self, random):
         for it to be uniform at random, but previous attempts to do that
         have proven too expensive.
         """
+
         assert not self.is_exhausted
         novel_prefix = bytearray()
 
-        def append_int(n_bits, value):
-            novel_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))
+        def append_buf(buf):
+            novel_prefix.extend(buf)
 
         current_node = self.root
         while True:
             assert not current_node.is_exhausted
-            for i, (n_bits, value) in enumerate(
-                zip(current_node.bit_lengths, current_node.values)
+            for i, (ir_type, kwargs, value) in enumerate(
+                zip(current_node.ir_types, current_node.kwargs, current_node.values)
             ):
                 if i in current_node.forced:
-                    append_int(n_bits, value)
+                    if ir_type == "float":
+                        value = int_to_float(value)
+                    (_value, buf) = self._draw(
+                        ir_type, kwargs, forced=value, random=random
+                    )
+                    append_buf(buf)
                 else:
+                    attempts = 0
                     while True:
-                        k = random.getrandbits(n_bits)
-                        if k != value:
-                            append_int(n_bits, k)
+                        if attempts <= 10:
+                            (v, buf) = self._draw(ir_type, kwargs, random=random)
+                        else:
+                            (v, buf) = self._draw_from_cache(
+                                ir_type, kwargs, key=id(current_node), random=random
+                            )
+
+                        if v != value:
+                            append_buf(buf)
                             break
+                        attempts += 1
+                        self._reject_child(
+                            ir_type, kwargs, child=v, key=id(current_node)
+                        )
                     # We've now found a value that is allowed to
                     # vary, so what follows is not fixed.
                     return bytes(novel_prefix)
@@ -230,27 +664,37 @@ def append_int(n_bits, value):
                     return bytes(novel_prefix)
                 branch = current_node.transition
                 assert isinstance(branch, Branch)
-                n_bits = branch.bit_length
 
-                check_counter = 0
+                attempts = 0
                 while True:
-                    k = random.getrandbits(n_bits)
+                    if attempts <= 10:
+                        (v, buf) = self._draw(
+                            branch.ir_type, branch.kwargs, random=random
+                        )
+                    else:
+                        (v, buf) = self._draw_from_cache(
+                            branch.ir_type, branch.kwargs, key=id(branch), random=random
+                        )
                     try:
-                        child = branch.children[k]
+                        child = branch.children[v]
                     except KeyError:
-                        append_int(n_bits, k)
+                        append_buf(buf)
                         return bytes(novel_prefix)
                     if not child.is_exhausted:
-                        append_int(n_bits, k)
+                        append_buf(buf)
                         current_node = child
                         break
-                    check_counter += 1
+                    attempts += 1
+                    self._reject_child(
+                        branch.ir_type, branch.kwargs, child=v, key=id(branch)
+                    )
+
                     # We don't expect this assertion to ever fire, but coverage
                     # wants the loop inside to run if you have branch checking
                     # on, hence the pragma.
                     assert (  # pragma: no cover
-                        check_counter != 1000
-                        or len(branch.children) < (2**n_bits)
+                        attempts != 1000
+                        or len(branch.children) < branch.max_children
                         or any(not v.is_exhausted for v in branch.children.values())
                     )
 
@@ -274,13 +718,22 @@ def simulate_test_function(self, data):
         or ``start_example`` as these are not currently recorded in the
         tree. This will likely change in future."""
         node = self.root
+
+        def draw(ir_type, kwargs, *, forced=None):
+            draw_func = getattr(data, f"draw_{ir_type}")
+            value = draw_func(**kwargs, forced=forced)
+
+            if ir_type == "float":
+                value = float_to_int(value)
+            return value
+
         try:
             while True:
-                for i, (n_bits, previous) in enumerate(
-                    zip(node.bit_lengths, node.values)
+                for i, (ir_type, kwargs, previous) in enumerate(
+                    zip(node.ir_types, node.kwargs, node.values)
                 ):
-                    v = data.draw_bits(
-                        n_bits, forced=node.values[i] if i in node.forced else None
+                    v = draw(
+                        ir_type, kwargs, forced=previous if i in node.forced else None
                     )
                     if v != previous:
                         raise PreviouslyUnseenBehaviour
@@ -290,7 +743,7 @@ def simulate_test_function(self, data):
                 elif node.transition is None:
                     raise PreviouslyUnseenBehaviour
                 elif isinstance(node.transition, Branch):
-                    v = data.draw_bits(node.transition.bit_length)
+                    v = draw(node.transition.ir_type, node.transition.kwargs)
                     try:
                         node = node.transition.children[v]
                     except KeyError as err:
@@ -305,6 +758,97 @@ def simulate_test_function(self, data):
     def new_observer(self):
         return TreeRecordingObserver(self)
 
+    def _draw(self, ir_type, kwargs, *, random, forced=None):
+        # we should possibly pull out BUFFER_SIZE to a common file to avoid this
+        # circular import.
+        from hypothesis.internal.conjecture.engine import BUFFER_SIZE
+
+        cd = ConjectureData(max_length=BUFFER_SIZE, prefix=b"", random=random)
+        draw_func = getattr(cd, f"draw_{ir_type}")
+
+        value = draw_func(**kwargs, forced=forced)
+        buf = cd.buffer
+
+        # using floats as keys into branch.children breaks things, because
+        # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are
+        # in fact distinct child branches.
+        # To distinguish floats here we'll use their bits representation. This
+        # entails some bookkeeping such that we're careful about when the
+        # float key is in its bits form (as a key into branch.children) and
+        # when it is in its float form (as a value we want to write to the
+        # buffer), and converting between the two forms as appropriate.
+        if ir_type == "float":
+            value = float_to_int(value)
+        return (value, buf)
+
+    def _get_children_cache(self, ir_type, kwargs, *, key):
+        # cache the state of the children generator per node/branch (passed as
+        # `key` here), such that we track which children we've already tried
+        # for this branch across draws.
+        # We take advantage of python generators here as one-way iterables,
+        # so each time we iterate we implicitly store our position in the
+        # children generator and don't re-draw children. `children` is the
+        # concrete list of children draw from the generator that we will work
+        # with. Whenever we need to top up this list, we will draw a new value
+        # from the generator.
+        if key not in self._children_cache:
+            generator = all_children(ir_type, kwargs)
+            children = []
+            rejected = set()
+            self._children_cache[key] = (generator, children, rejected)
+
+        return self._children_cache[key]
+
+    def _draw_from_cache(self, ir_type, kwargs, *, key, random):
+        (generator, children, rejected) = self._get_children_cache(
+            ir_type, kwargs, key=key
+        )
+        # Keep a stock of 100 potentially-valid children at all times.
+        # This number is chosen to balance memory/speed vs randomness. Ideally
+        # we would sample uniformly from all not-yet-rejected children, but
+        # computing and storing said children is not free.
+        # no-branch because coverage of the fall-through case here is a bit
+        # annoying.
+        if len(children) < 100:  # pragma: no branch
+            for v in generator:
+                if ir_type == "float":
+                    v = float_to_int(v)
+                if v in rejected:
+                    continue
+                children.append(v)
+                if len(children) >= 100:
+                    break
+
+        forced = random.choice(children)
+        if ir_type == "float":
+            forced = int_to_float(forced)
+        (value, buf) = self._draw(ir_type, kwargs, forced=forced, random=random)
+        return (value, buf)
+
+    def _reject_child(self, ir_type, kwargs, *, child, key):
+        (_generator, children, rejected) = self._get_children_cache(
+            ir_type, kwargs, key=key
+        )
+        rejected.add(child)
+        # we remove a child from the list of possible children *only* when it is
+        # rejected, and not when it is initially drawn in _draw_from_cache. The
+        # reason is that a child being drawn does not guarantee that child will
+        # be used in a way such that it is written back to the tree, so it needs
+        # to be available for future draws until we are certain it has been
+        # used.
+        #
+        # For instance, if we generated novel prefixes in a loop (but never used
+        # those prefixes to generate new values!) then we don't want to remove
+        # the drawn children from the available pool until they are actually
+        # used.
+        #
+        # This does result in a small inefficiency: we may draw a child,
+        # immediately use it (so we know it cannot be drawn again), but still
+        # wait to draw and reject it here, because DataTree cannot guarantee
+        # the drawn child has been used.
+        if child in children:
+            children.remove(child)
+
 
 class TreeRecordingObserver(DataObserver):
     def __init__(self, tree):
@@ -313,13 +857,49 @@ def __init__(self, tree):
         self.__trail = [self.__current_node]
         self.killed = False
 
-    def draw_bits(self, n_bits, forced, value):
+    def draw_integer(
+        self, value: int, *, was_forced: bool, kwargs: IntegerKWargs
+    ) -> None:
+        self.draw_value("integer", value, was_forced=was_forced, kwargs=kwargs)
+
+    def draw_float(
+        self, value: float, *, was_forced: bool, kwargs: FloatKWargs
+    ) -> None:
+        self.draw_value("float", value, was_forced=was_forced, kwargs=kwargs)
+
+    def draw_string(
+        self, value: str, *, was_forced: bool, kwargs: StringKWargs
+    ) -> None:
+        self.draw_value("string", value, was_forced=was_forced, kwargs=kwargs)
+
+    def draw_bytes(
+        self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs
+    ) -> None:
+        self.draw_value("bytes", value, was_forced=was_forced, kwargs=kwargs)
+
+    def draw_boolean(
+        self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs
+    ) -> None:
+        self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs)
+
+    def draw_value(
+        self,
+        ir_type: IRLiteralType,
+        value: IRType,
+        *,
+        was_forced: bool,
+        kwargs: IRKWargsType,
+    ) -> None:
         i = self.__index_in_current_node
         self.__index_in_current_node += 1
         node = self.__current_node
-        assert len(node.bit_lengths) == len(node.values)
-        if i < len(node.bit_lengths):
-            if n_bits != node.bit_lengths[i]:
+
+        if isinstance(value, float):
+            value = float_to_int(value)
+
+        assert len(node.kwargs) == len(node.values) == len(node.ir_types)
+        if i < len(node.values):
+            if ir_type != node.ir_types[i] or kwargs != node.kwargs[i]:
                 inconsistent_generation()
             # Note that we don't check whether a previously
             # forced value is now free. That will be caught
@@ -327,23 +907,43 @@ def draw_bits(self, n_bits, forced, value):
             # may pass silently. This is acceptable because it
             # means we skip a hash set lookup on every
             # draw and that's a pretty niche failure mode.
-            if forced and i not in node.forced:
+            if was_forced and i not in node.forced:
                 inconsistent_generation()
             if value != node.values[i]:
                 node.split_at(i)
                 assert i == len(node.values)
                 new_node = TreeNode()
-                branch = node.transition
-                branch.children[value] = new_node
+                node.transition.children[value] = new_node
                 self.__current_node = new_node
                 self.__index_in_current_node = 0
         else:
             trans = node.transition
             if trans is None:
-                node.bit_lengths.append(n_bits)
+                node.ir_types.append(ir_type)
+                node.kwargs.append(kwargs)
                 node.values.append(value)
-                if forced:
+                if was_forced:
                     node.mark_forced(i)
+                # generate_novel_prefix assumes the following invariant: any one
+                # of the series of draws in a particular node can vary, i.e. the
+                # max number of children is at least 2. However, some draws are
+                # pseudo-choices and only have a single value, such as
+                # integers(0, 0).
+                #
+                # Currently, we address this by forcefully splitting such
+                # single-valued nodes into a transition when we see them. An
+                # exception to this is if it was forced: forced pseudo-choices
+                # do not cause the above issue because they inherently cannot
+                # vary, and moreover they trip other invariants about never
+                # splitting forced nodes.
+                #
+                # An alternative is not writing such choices to the tree at
+                # all, and thus guaranteeing that each node has at least 2 max
+                # children.
+                if compute_max_children(ir_type, kwargs) == 1 and not was_forced:
+                    node.split_at(i)
+                    self.__current_node = node.transition.children[value]
+                    self.__index_in_current_node = 0
             elif isinstance(trans, Conclusion):
                 assert trans.status != Status.OVERRUN
                 # We tried to draw where history says we should have
@@ -351,7 +951,7 @@ def draw_bits(self, n_bits, forced, value):
                 inconsistent_generation()
             else:
                 assert isinstance(trans, Branch), trans
-                if n_bits != trans.bit_length:
+                if ir_type != trans.ir_type or kwargs != trans.kwargs:
                     inconsistent_generation()
                 try:
                     self.__current_node = trans.children[value]
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
index 61f9d742bb..5e77437a78 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
@@ -101,13 +101,12 @@ class Sampler:
 
     table: List[Tuple[int, int, float]]  # (base_idx, alt_idx, alt_chance)
 
-    def __init__(self, weights: Sequence[float]):
-        n = len(weights)
+    def __init__(self, weights: Sequence[float], *, observe: bool = True):
+        self.observe = observe
 
+        n = len(weights)
         table: "list[list[int | float | None]]" = [[i, None, None] for i in range(n)]
-
         total = sum(weights)
-
         num_type = type(total)
 
         zero = num_type(0)  # type: ignore
@@ -179,7 +178,7 @@ def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
             )
         )
         base, alternate, alternate_chance = data.choice(
-            self.table, forced=forced_choice
+            self.table, forced=forced_choice, observe=self.observe
         )
         forced_use_alternate = None
         if forced is not None:
@@ -189,7 +188,9 @@ def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
             forced_use_alternate = forced == alternate and alternate_chance > 0
             assert forced == base or forced_use_alternate
 
-        use_alternate = data.draw_boolean(alternate_chance, forced=forced_use_alternate)
+        use_alternate = data.draw_boolean(
+            alternate_chance, forced=forced_use_alternate, observe=self.observe
+        )
         data.stop_example()
         if use_alternate:
             assert forced is None or alternate == forced, (forced, alternate)
@@ -200,7 +201,7 @@ def sample(self, data: "ConjectureData", forced: Optional[int] = None) -> int:
 
 
 INT_SIZES = (8, 16, 32, 64, 128)
-INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5))
+INT_SIZES_SAMPLER = Sampler((4.0, 8.0, 1.0, 1.0, 0.5), observe=False)
 
 
 class many:
@@ -223,6 +224,7 @@ def __init__(
         average_size: Union[int, float],
         *,
         forced: Optional[int] = None,
+        observe: bool = True,
     ) -> None:
         assert 0 <= min_size <= average_size <= max_size
         assert forced is None or min_size <= forced <= max_size
@@ -236,17 +238,17 @@ def __init__(
         self.drawn = False
         self.force_stop = False
         self.rejected = False
+        self.observe = observe
 
     def more(self) -> bool:
         """Should I draw another element to add to the collection?"""
         if self.drawn:
-            self.data.stop_example(discard=self.rejected)
+            self.data.stop_example()
 
         self.drawn = True
         self.rejected = False
 
         self.data.start_example(ONE_FROM_MANY_LABEL)
-
         if self.min_size == self.max_size:
             # if we have to hit an exact size, draw unconditionally until that
             # point, and no further.
@@ -265,7 +267,7 @@ def more(self) -> bool:
             elif self.forced_size is not None:
                 forced_result = self.count < self.forced_size
             should_continue = self.data.draw_boolean(
-                self.p_continue, forced=forced_result
+                self.p_continue, forced=forced_result, observe=self.observe
             )
 
         if should_continue:
diff --git a/hypothesis-python/tests/conjecture/common.py b/hypothesis-python/tests/conjecture/common.py
index d3a8b04281..aab8065e3c 100644
--- a/hypothesis-python/tests/conjecture/common.py
+++ b/hypothesis-python/tests/conjecture/common.py
@@ -74,8 +74,8 @@ def accept(f):
     return accept
 
 
-def fresh_data():
-    return ConjectureData(BUFFER_SIZE, prefix=b"", random=Random())
+def fresh_data(*, observer=None) -> ConjectureData:
+    return ConjectureData(BUFFER_SIZE, prefix=b"", random=Random(), observer=observer)
 
 
 @st.composite
@@ -193,7 +193,7 @@ def draw_float_kwargs(
     draw, *, use_min_value=True, use_max_value=True, use_forced=False
 ):
     forced = draw(st.floats()) if use_forced else None
-    pivot = forced if not math.isnan(forced) else None
+    pivot = forced if (use_forced and not math.isnan(forced)) else None
     min_value = -math.inf
     max_value = math.inf
 
diff --git a/hypothesis-python/tests/conjecture/test_data_tree.py b/hypothesis-python/tests/conjecture/test_data_tree.py
index 9acd088d5f..363bf9f1fa 100644
--- a/hypothesis-python/tests/conjecture/test_data_tree.py
+++ b/hypothesis-python/tests/conjecture/test_data_tree.py
@@ -12,11 +12,27 @@
 
 import pytest
 
-from hypothesis import HealthCheck, settings
+from hypothesis import HealthCheck, assume, given, settings
 from hypothesis.errors import Flaky
 from hypothesis.internal.conjecture.data import ConjectureData, Status, StopTest
-from hypothesis.internal.conjecture.datatree import DataTree
+from hypothesis.internal.conjecture.datatree import (
+    Branch,
+    DataTree,
+    compute_max_children,
+)
 from hypothesis.internal.conjecture.engine import ConjectureRunner
+from hypothesis.internal.conjecture.floats import float_to_int
+from hypothesis.internal.floats import next_up
+
+from tests.conjecture.common import (
+    draw_boolean_kwargs,
+    draw_bytes_kwargs,
+    draw_float_kwargs,
+    draw_integer_kwargs,
+    draw_string_kwargs,
+    fresh_data,
+    run_to_buffer,
+)
 
 TEST_SETTINGS = settings(
     max_examples=5000, database=None, suppress_health_check=list(HealthCheck)
@@ -141,7 +157,7 @@ def runner(data):
         data.mark_interesting()
 
     root = runner.tree.root
-    assert len(root.bit_lengths) == 10
+    assert len(root.kwargs) == 10
     assert len(root.values) == 10
     assert root.transition.status == Status.INTERESTING
 
@@ -155,7 +171,7 @@ def runner(data):
         data.mark_interesting()
 
     root = runner.tree.root
-    assert len(root.bit_lengths) == len(root.values) == 1
+    assert len(root.kwargs) == len(root.values) == 1
     assert list(root.transition.children[0].values) == [2]
     assert list(root.transition.children[1].values) == [3]
 
@@ -328,7 +344,7 @@ def test_child_becomes_exhausted_after_split():
     data.freeze()
 
     assert not tree.is_exhausted
-    assert tree.root.transition.children[0].is_exhausted
+    assert tree.root.transition.children[b"\0"].is_exhausted
 
 
 def test_will_generate_novel_prefix_to_avoid_exhausted_branches():
@@ -363,3 +379,194 @@ def test_will_mark_changes_in_discard_as_flaky():
 
     with pytest.raises(Flaky):
         data.stop_example(discard=True)
+
+
+def test_is_not_flaky_on_positive_zero_and_negative_zero():
+    # if we store floats in a naive way, the 0.0 and -0.0 draws will be treated
+    # equivalently and will lead to flaky errors when they diverge on the boolean
+    # draw.
+    tree = DataTree()
+
+    @run_to_buffer
+    def buf1(data):
+        data.draw_float(forced=0.0)
+        # the value drawn here doesn't actually matter, since we'll force it
+        # latter. we just want to avoid buffer overruns.
+        data.draw_boolean()
+        data.mark_interesting()
+
+    @run_to_buffer
+    def buf2(data):
+        data.draw_float(forced=-0.0)
+        data.draw_boolean()
+        data.mark_interesting()
+
+    data = ConjectureData.for_buffer(buf1, observer=tree.new_observer())
+    f = data.draw_float()
+    assert float_to_int(f) == float_to_int(0.0)
+    data.draw_boolean(forced=False)
+    data.freeze()
+
+    data = ConjectureData.for_buffer(buf2, observer=tree.new_observer())
+    f = data.draw_float()
+    assert float_to_int(f) == float_to_int(-0.0)
+    data.draw_boolean(forced=True)
+    data.freeze()
+
+    assert isinstance(tree.root.transition, Branch)
+    children = tree.root.transition.children
+    assert children[float_to_int(0.0)].values == [False]
+    assert children[float_to_int(-0.0)].values == [True]
+
+
+def test_low_probabilities_are_still_explored():
+    @run_to_buffer
+    def true_buf(data):
+        data.draw_boolean(p=1e-10, forced=True)
+        data.mark_interesting()
+
+    @run_to_buffer
+    def false_buf(data):
+        data.draw_boolean(p=1e-10, forced=False)
+        data.mark_interesting()
+
+    tree = DataTree()
+
+    data = ConjectureData.for_buffer(false_buf, observer=tree.new_observer())
+    data.draw_boolean(p=1e-10)  # False
+
+    v = tree.generate_novel_prefix(Random())
+    assert v == true_buf
+
+
+def _test_observed_draws_are_recorded_in_tree(ir_type):
+    kwargs_strategy = {
+        "integer": draw_integer_kwargs(),
+        "bytes": draw_bytes_kwargs(),
+        "float": draw_float_kwargs(),
+        "string": draw_string_kwargs(),
+        "boolean": draw_boolean_kwargs(),
+    }[ir_type]
+
+    @given(kwargs_strategy)
+    def test(kwargs):
+        # we currently split pseudo-choices with a single child into their
+        # own transition, which clashes with our asserts below. If we ever
+        # change this (say, by not writing pseudo choices to the ir at all),
+        # this restriction can be relaxed.
+        assume(compute_max_children(ir_type, kwargs) > 1)
+
+        tree = DataTree()
+        data = fresh_data(observer=tree.new_observer())
+        draw_func = getattr(data, f"draw_{ir_type}")
+        draw_func(**kwargs)
+
+        assert tree.root.transition is None
+        assert tree.root.ir_types == [ir_type]
+
+    test()
+
+
+def _test_non_observed_draws_are_not_recorded_in_tree(ir_type):
+    kwargs_strategy = {
+        "integer": draw_integer_kwargs(),
+        "bytes": draw_bytes_kwargs(),
+        "float": draw_float_kwargs(),
+        "string": draw_string_kwargs(),
+        "boolean": draw_boolean_kwargs(),
+    }[ir_type]
+
+    @given(kwargs_strategy)
+    def test(kwargs):
+        assume(compute_max_children(ir_type, kwargs) > 1)
+
+        tree = DataTree()
+        data = fresh_data(observer=tree.new_observer())
+        draw_func = getattr(data, f"draw_{ir_type}")
+        draw_func(**kwargs, observe=False)
+
+        root = tree.root
+        assert root.transition is None
+        assert root.kwargs == root.values == root.ir_types == []
+
+    test()
+
+
+@pytest.mark.parametrize("ir_type", ["integer", "float", "boolean", "string", "bytes"])
+def test_observed_ir_type_draw(ir_type):
+    _test_observed_draws_are_recorded_in_tree(ir_type)
+
+
+@pytest.mark.parametrize("ir_type", ["integer", "float", "boolean", "string", "bytes"])
+def test_non_observed_ir_type_draw(ir_type):
+    _test_non_observed_draws_are_not_recorded_in_tree(ir_type)
+
+
+def test_can_generate_hard_values():
+    tree = DataTree()
+
+    min_value = 0
+    max_value = 1000
+    # set up `tree` such that [0, 999] have been drawn and only n=1000 remains.
+    for i in range(max_value):
+
+        @run_to_buffer
+        def buf(data):
+            data.draw_integer(min_value, max_value, forced=i)
+            data.mark_interesting()
+
+        data = ConjectureData.for_buffer(buf, observer=tree.new_observer())
+        data.draw_integer(min_value, max_value)
+        data.freeze()
+
+    @run_to_buffer
+    def expected_buf(data):
+        data.draw_integer(min_value, max_value, forced=max_value)
+        data.mark_interesting()
+
+    # this test doubles as conjecture coverage for using our child cache, so
+    # ensure we don't miss that logic by getting lucky and drawing the correct
+    # value once or twice.
+    for _ in range(5):
+        assert tree.generate_novel_prefix(Random()) == expected_buf
+
+
+def test_can_generate_hard_floats():
+    # similar to test_can_generate_hard_values, but exercises float-specific
+    # logic for handling e.g. 0.0 vs -0.0 as different keys.
+    tree = DataTree()
+
+    def next_up_n(f, n):
+        for _ in range(n):
+            f = next_up(f)
+        return f
+
+    min_value = -0.0
+    max_value = next_up_n(min_value, 100)
+    for n in range(100):
+
+        @run_to_buffer
+        def buf(data):
+            f = next_up_n(min_value, n)
+            data.draw_float(min_value, max_value, forced=f, allow_nan=False)
+            data.mark_interesting()
+
+        data = ConjectureData.for_buffer(buf, observer=tree.new_observer())
+        data.draw_float(min_value, max_value, allow_nan=False)
+        data.freeze()
+
+    # we want to leave out a single value, such that we can assert
+    # generate_novel_prefix is equal to the buffer that would produce that value.
+    # The problem is that floats have multiple valid buffer representations due
+    # to clamping. Making the test buffer deterministic is annoying/impossible,
+    # and the buffer representation is going away soon anyway, so just make
+    # sure we generate the expected value (not necessarily buffer).
+
+    # this test doubles as conjecture coverage for drawing floats from the
+    # children cache. Draw a few times to ensure we hit that logic (as opposed
+    # to getting lucky and drawing the correct value the first time).
+    for _ in range(5):
+        expected_value = next_up_n(min_value, 100)
+        prefix = tree.generate_novel_prefix(Random())
+        data = ConjectureData.for_buffer(prefix)
+        assert data.draw_float(min_value, max_value, allow_nan=False) == expected_value
diff --git a/hypothesis-python/tests/conjecture/test_inquisitor.py b/hypothesis-python/tests/conjecture/test_inquisitor.py
index 251ca91efc..8b548d6a23 100644
--- a/hypothesis-python/tests/conjecture/test_inquisitor.py
+++ b/hypothesis-python/tests/conjecture/test_inquisitor.py
@@ -28,11 +28,14 @@ def _new():
     return _inner
 
 
+# this should have a marked as freely varying, but false negatives in our
+# inquisitor code skip over it sometimes, depending on the seen_passed_buffers.
+# yet another thing that should be improved by moving to the ir.
 @fails_with_output(
     """
 Falsifying example: test_inquisitor_comments_basic_fail_if_either(
     # The test always failed when commented parts were varied together.
-    a=False,  # or any other generated value
+    a=False,
     b=True,
     c=[],  # or any other generated value
     d=True,
diff --git a/hypothesis-python/tests/conjecture/test_ir.py b/hypothesis-python/tests/conjecture/test_ir.py
new file mode 100644
index 0000000000..4f2651465a
--- /dev/null
+++ b/hypothesis-python/tests/conjecture/test_ir.py
@@ -0,0 +1,156 @@
+# This file is part of Hypothesis, which may be found at
+# https://github.com/HypothesisWorks/hypothesis/
+#
+# Copyright the Hypothesis Authors.
+# Individual contributors are listed in AUTHORS.rst and the git log.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public License,
+# v. 2.0. If a copy of the MPL was not distributed with this file, You can
+# obtain one at https://mozilla.org/MPL/2.0/.
+
+from hypothesis import assume, example, given, strategies as st
+from hypothesis.internal.conjecture.datatree import (
+    MAX_CHILDREN_EFFECTIVELY_INFINITE,
+    all_children,
+    compute_max_children,
+)
+from hypothesis.internal.floats import next_down, next_up
+from hypothesis.internal.intervalsets import IntervalSet
+
+from tests.conjecture.common import (
+    draw_boolean_kwargs,
+    draw_bytes_kwargs,
+    draw_float_kwargs,
+    draw_integer_kwargs,
+    draw_string_kwargs,
+    fresh_data,
+)
+
+
+@st.composite
+def ir_types_and_kwargs(draw):
+    ir_type = draw(st.sampled_from(["integer", "bytes", "float", "string", "boolean"]))
+    kwargs_strategy = {
+        "integer": draw_integer_kwargs(),
+        "bytes": draw_bytes_kwargs(),
+        "float": draw_float_kwargs(),
+        "string": draw_string_kwargs(),
+        "boolean": draw_boolean_kwargs(),
+    }[ir_type]
+    kwargs = draw(kwargs_strategy)
+
+    return (ir_type, kwargs)
+
+
+# we max out at 128 bit integers in the *unbounded* case, but someone may
+# specify a bound with a larger magnitude. Ensure we calculate max children for
+# those cases correctly.
+@example(("integer", {"min_value": None, "max_value": -(2**200), "weights": None}))
+@example(("integer", {"min_value": 2**200, "max_value": None, "weights": None}))
+@example(("integer", {"min_value": -(2**200), "max_value": 2**200, "weights": None}))
+@given(ir_types_and_kwargs())
+def test_compute_max_children_is_positive(ir_type_and_kwargs):
+    (ir_type, kwargs) = ir_type_and_kwargs
+    assert compute_max_children(ir_type, kwargs) >= 0
+
+
+def test_compute_max_children_integer_zero_weight():
+    kwargs = {"min_value": 1, "max_value": 2, "weights": [0, 1]}
+    assert compute_max_children("integer", kwargs) == 1
+
+    kwargs = {"min_value": 1, "max_value": 4, "weights": [0, 0.5, 0, 0.5]}
+    assert compute_max_children("integer", kwargs) == 2
+
+
+def test_compute_max_children_string_unbounded_max_size():
+    kwargs = {
+        "min_size": 0,
+        "max_size": None,
+        "intervals": IntervalSet.from_string("a"),
+    }
+    assert compute_max_children("string", kwargs) == MAX_CHILDREN_EFFECTIVELY_INFINITE
+
+
+def test_compute_max_children_string_empty_intervals():
+    kwargs = {"min_size": 0, "max_size": 100, "intervals": IntervalSet.from_string("")}
+    # only possibility is the empty string
+    assert compute_max_children("string", kwargs) == 1
+
+
+def test_compute_max_children_string_reasonable_size():
+    kwargs = {"min_size": 8, "max_size": 8, "intervals": IntervalSet.from_string("abc")}
+    # 3 possibilities for each character, 8 characters, 3 ** 8 possibilities.
+    assert compute_max_children("string", kwargs) == 3**8
+
+    kwargs = {
+        "min_size": 2,
+        "max_size": 8,
+        "intervals": IntervalSet.from_string("abcd"),
+    }
+    assert compute_max_children("string", kwargs) == sum(4**k for k in range(2, 8 + 1))
+
+
+def test_compute_max_children_empty_string():
+    kwargs = {"min_size": 0, "max_size": 0, "intervals": IntervalSet.from_string("abc")}
+    assert compute_max_children("string", kwargs) == 1
+
+
+def test_compute_max_children_string_very_large():
+    kwargs = {
+        "min_size": 0,
+        "max_size": 10_000,
+        "intervals": IntervalSet.from_string("abcdefg"),
+    }
+    assert compute_max_children("string", kwargs) == MAX_CHILDREN_EFFECTIVELY_INFINITE
+
+
+def test_compute_max_children_boolean():
+    assert compute_max_children("boolean", {"p": 0.0}) == 1
+    assert compute_max_children("boolean", {"p": 1.0}) == 1
+
+    assert compute_max_children("boolean", {"p": 0.5}) == 2
+    assert compute_max_children("boolean", {"p": 0.001}) == 2
+    assert compute_max_children("boolean", {"p": 0.999}) == 2
+
+
+@given(st.text(min_size=1, max_size=1), st.integers(0, 100))
+def test_draw_string_single_interval_with_equal_bounds(s, n):
+    data = fresh_data()
+    intervals = IntervalSet.from_string(s)
+    assert data.draw_string(intervals, min_size=n, max_size=n) == s * n
+
+
+@example(("boolean", {"p": 2**-65}))
+@example(("boolean", {"p": 1 - 2**-65}))
+@example(
+    (
+        "string",
+        {"min_size": 0, "max_size": 0, "intervals": IntervalSet.from_string("abc")},
+    )
+)
+@example(
+    ("string", {"min_size": 0, "max_size": 3, "intervals": IntervalSet.from_string("")})
+)
+@example(
+    (
+        "string",
+        {"min_size": 0, "max_size": 3, "intervals": IntervalSet.from_string("a")},
+    )
+)
+# all combinations of float signs
+@example(("float", {"min_value": next_down(-0.0), "max_value": -0.0}))
+@example(("float", {"min_value": next_down(-0.0), "max_value": next_up(0.0)}))
+@example(("float", {"min_value": 0.0, "max_value": next_up(0.0)}))
+@example(("integer", {"min_value": 1, "max_value": 2, "weights": [0, 1]}))
+@given(ir_types_and_kwargs())
+def test_compute_max_children_and_all_children_agree(ir_type_and_kwargs):
+    (ir_type, kwargs) = ir_type_and_kwargs
+    max_children = compute_max_children(ir_type, kwargs)
+
+    # avoid slowdowns / OOM when reifying extremely large all_children generators.
+    # We also hard cap at MAX_CHILDREN_EFFECTIVELY_INFINITE, because max_children
+    # returns approximations after this value and so will disagree with
+    # all_children.
+    cap = min(100_000, MAX_CHILDREN_EFFECTIVELY_INFINITE)
+    assume(max_children < cap)
+    assert len(list(all_children(ir_type, kwargs))) == max_children
diff --git a/hypothesis-python/tests/conjecture/test_junkdrawer.py b/hypothesis-python/tests/conjecture/test_junkdrawer.py
index 13bfc2d031..1a53f32698 100644
--- a/hypothesis-python/tests/conjecture/test_junkdrawer.py
+++ b/hypothesis-python/tests/conjecture/test_junkdrawer.py
@@ -144,6 +144,19 @@ def test_int_list_extend():
     assert list(x) == [0, 0, 0, n]
 
 
+def test_int_list_slice():
+    x = IntList([1, 2])
+    assert x[:1] == IntList([1])
+    assert x[0:2] == IntList([1, 2])
+    assert x[1:] == IntList([2])
+
+
+def test_int_list_del():
+    x = IntList([1, 2])
+    del x[0]
+    assert x == IntList([2])
+
+
 @pytest.mark.parametrize("n", [0, 1, 30, 70])
 def test_binary_search(n):
     i = binary_search(0, 100, lambda i: i <= n)
diff --git a/hypothesis-python/tests/conjecture/test_optimiser.py b/hypothesis-python/tests/conjecture/test_optimiser.py
index 7c03c7ea66..4eb4f2461d 100644
--- a/hypothesis-python/tests/conjecture/test_optimiser.py
+++ b/hypothesis-python/tests/conjecture/test_optimiser.py
@@ -134,15 +134,15 @@ def test_targeting_can_drive_length_very_high():
 
         def test(data):
             count = 0
-            # TODO this test fails with data.draw_boolean(0.25). Does the hill
-            # climbing optimizer just not like the bit representation of boolean
-            # draws, or do we have a deeper bug here?
-            while data.draw_integer(0, 3) == 3:
+            while data.draw_boolean(0.25):
                 count += 1
             data.target_observations[""] = min(count, 100)
 
         runner = ConjectureRunner(test, settings=TEST_SETTINGS)
-        runner.cached_test_function(bytes(10))
+        # extend here to ensure we get a valid (non-overrun) test case. The
+        # outcome of the test case doesn't really matter as long as we have
+        # something for the runner to optimize.
+        runner.cached_test_function(b"", extend=50)
 
         try:
             runner.optimise_targets()
diff --git a/hypothesis-python/tests/conjecture/test_test_data.py b/hypothesis-python/tests/conjecture/test_test_data.py
index 9e30e5df5d..5186fc632f 100644
--- a/hypothesis-python/tests/conjecture/test_test_data.py
+++ b/hypothesis-python/tests/conjecture/test_test_data.py
@@ -163,7 +163,6 @@ def eg(u, v):
 
 def test_example_depth_marking():
     d = ConjectureData.for_buffer(bytes(24))
-
     # These draw sizes are chosen so that each example has a unique length.
     d.draw_bytes(2)
     d.start_example("inner")
@@ -200,16 +199,6 @@ def test_has_cached_examples_even_when_overrun():
     assert d.examples is d.examples
 
 
-def test_can_write_empty_bytes():
-    d = ConjectureData.for_buffer([1, 1, 1])
-    d.draw_boolean()
-    d.draw_bytes(0)
-    d.draw_boolean()
-    d.draw_bytes(0, forced=b"")
-    d.draw_boolean()
-    assert d.buffer == bytes([1, 1, 1])
-
-
 def test_blocks_preserve_identity():
     n = 10
     d = ConjectureData.for_buffer([1] * 10)
@@ -252,8 +241,11 @@ class LoggingObserver(DataObserver):
         def __init__(self):
             self.log = []
 
-        def draw_bits(self, n_bits: int, *, forced: bool, value: int) -> None:
-            self.log.append(("draw", n_bits, forced, value))
+        def draw_boolean(self, value: bool, *, was_forced: bool, kwargs: dict):
+            self.log.append(("draw_boolean", value, was_forced))
+
+        def draw_integer(self, value: int, *, was_forced: bool, kwargs: dict):
+            self.log.append(("draw_integer", value, was_forced))
 
         def conclude_test(self, *args):
             assert x.frozen
@@ -269,9 +261,9 @@ def conclude_test(self, *args):
         x.conclude_test(Status.INTERESTING, interesting_origin="neat")
 
     assert observer.log == [
-        ("draw", 1, False, 1),
-        ("draw", 7, True, 10),
-        ("draw", 8, False, 3),
+        ("draw_boolean", True, False),
+        ("draw_integer", 10, True),
+        ("draw_integer", 3, False),
         ("concluded", Status.INTERESTING, "neat"),
     ]
 
diff --git a/hypothesis-python/tests/conjecture/test_utils.py b/hypothesis-python/tests/conjecture/test_utils.py
index ccf6f1b204..8fc779e56f 100644
--- a/hypothesis-python/tests/conjecture/test_utils.py
+++ b/hypothesis-python/tests/conjecture/test_utils.py
@@ -31,13 +31,6 @@
 from hypothesis.internal.intervalsets import IntervalSet
 
 
-def test_does_draw_data_for_empty_range():
-    data = ConjectureData.for_buffer(b"\1")
-    assert data.draw_integer(1, 1) == 1
-    data.freeze()
-    assert data.buffer == b"\0"
-
-
 def test_coin_biased_towards_truth():
     p = 1 - 1.0 / 500
 
diff --git a/hypothesis-python/tests/cover/test_intervalset.py b/hypothesis-python/tests/cover/test_intervalset.py
index 594412768f..f6c934cb37 100644
--- a/hypothesis-python/tests/cover/test_intervalset.py
+++ b/hypothesis-python/tests/cover/test_intervalset.py
@@ -70,7 +70,7 @@ def intervals_to_set(ints):
     return set(IntervalSet(ints))
 
 
-@settings(suppress_health_check=[HealthCheck.filter_too_much])
+@settings(suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow])
 @example(x=[(0, 1), (3, 3)], y=[(1, 3)])
 @example(x=[(0, 1)], y=[(0, 0), (1, 1)])
 @example(x=[(0, 1)], y=[(1, 1)])
diff --git a/hypothesis-python/tests/cover/test_searchstrategy.py b/hypothesis-python/tests/cover/test_searchstrategy.py
index 28b617e797..b8665550ae 100644
--- a/hypothesis-python/tests/cover/test_searchstrategy.py
+++ b/hypothesis-python/tests/cover/test_searchstrategy.py
@@ -141,3 +141,15 @@ def test_jsonable_namedtuple():
     Obj = namedtuple("Obj", ("x"))
     obj = Obj(10)
     assert to_jsonable(obj) == {"x": 10}
+
+
+def test_jsonable_small_ints_are_ints():
+    n = 2**62
+    assert isinstance(to_jsonable(n), int)
+    assert to_jsonable(n) == n
+
+
+def test_jsonable_large_ints_are_floats():
+    n = 2**63
+    assert isinstance(to_jsonable(n), float)
+    assert to_jsonable(n) == float(n)
diff --git a/hypothesis-python/tests/cover/test_slippage.py b/hypothesis-python/tests/cover/test_slippage.py
index 66762bc15c..0872652901 100644
--- a/hypothesis-python/tests/cover/test_slippage.py
+++ b/hypothesis-python/tests/cover/test_slippage.py
@@ -176,30 +176,36 @@ def count():
 
 
 def test_shrinks_both_failures():
-    first_has_failed = [False]
+    first_has_failed = False
     duds = set()
-    second_target = [None]
+    second_target = None
 
     @settings(database=None, max_examples=1000)
-    @given(st.integers(min_value=0).map(int))
+    @given(st.integers(min_value=0))
     def test(i):
+        nonlocal first_has_failed, duds, second_target
+
         if i >= 10000:
-            first_has_failed[0] = True
+            first_has_failed = True
             raise AssertionError
+
         assert i < 10000
-        if first_has_failed[0]:
-            if second_target[0] is None:
+        if first_has_failed:
+            if second_target is None:
                 for j in range(10000):
                     if j not in duds:
-                        second_target[0] = j
+                        second_target = j
                         break
-            assert i < second_target[0]
+            # to avoid flaky errors, don't error on an input that we previously
+            # passed.
+            if i not in duds:
+                assert i < second_target
         else:
             duds.add(i)
 
     output = capture_reports(test)
     assert_output_contains_failure(output, test, i=10000)
-    assert_output_contains_failure(output, test, i=second_target[0])
+    assert_output_contains_failure(output, test, i=second_target)
 
 
 def test_handles_flaky_tests_where_only_one_is_flaky():
@@ -258,7 +264,7 @@ def test(i):
 
 
 def test_finds_multiple_failures_in_generation():
-    special = []
+    special = None
     seen = set()
 
     @settings(phases=[Phase.generate, Phase.shrink], max_examples=100)
@@ -269,14 +275,19 @@ def test(x):
         is larger than it is a different failure. This demonstrates that we
         can keep generating larger examples and still find new bugs after that
         point."""
+        nonlocal special
         if not special:
-            if len(seen) >= 10 and x <= 1000:
-                special.append(x)
+            # don't mark duplicate inputs as special and thus erroring, to avoid
+            # flakiness where we passed the input the first time but failed it the
+            # second.
+            if len(seen) >= 10 and x <= 1000 and x not in seen:
+                special = x
             else:
                 seen.add(x)
+
         if special:
-            assert x in seen or (x <= special[0])
-        assert x not in special
+            assert x in seen or x <= special
+        assert x != special
 
     with pytest.raises(ExceptionGroup):
         test()
diff --git a/pyproject.toml b/pyproject.toml
index c847aa82e9..3b38545b9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,10 @@ exclude = [
 [tool.ruff.lint.per-file-ignores]
 "hypothesis-python/src/hypothesis/core.py" = ["B030", "B904", "FBT001"]
 "hypothesis-python/src/hypothesis/internal/compat.py" = ["F401"]
+"hypothesis-python/src/hypothesis/internal/conjecture/data.py" = ["FBT001"]
+"hypothesis-python/src/hypothesis/internal/conjecture/datatree.py" = ["FBT001"]
 "hypothesis-python/tests/nocover/test_imports.py" = ["F403", "F405"]
 "hypothesis-python/tests/numpy/test_randomness.py" = ["NPY002"]
 "hypothesis-python/src/hypothesis/internal/conjecture/*" = ["B023"]
+"hypothesis-python/tests/conjecture/test_data_tree.py" = ["B023"]
+"hypothesis-python/tests/conjecture/test_test_data.py" = ["FBT001"]