HypothesisWorks · Zac-HD · Jun 8, 2025 · Jun 1, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
+RELEASE_TYPE: patch
+
+This release adds the experimental and unstable |OBSERVABILITY_CHOICES| option for :ref:`observability <observability>`. If set, the choice sequence is included in ``metadata.choice_nodes``, and choice sequence spans are included in ``metadata.choice_spans``.
+
+These are relatively low-level implementation detail of Hypothesis, and are exposed in observability for users building tools or research on top of Hypothesis. See |PrimitiveProvider| for more details about the choice sequence and choice spans.
+
+We are actively working towards a better interface for this. Feel free to use |OBSERVABILITY_CHOICES| to experiment, but don't rely on it yet!
diff --git a/hypothesis-python/docs/prolog.rst b/hypothesis-python/docs/prolog.rst
@@ -116,17 +116,23 @@
 .. |PrimitiveProvider.draw_string| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.draw_string`
 .. |PrimitiveProvider.draw_bytes| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.draw_bytes`
 .. |PrimitiveProvider.on_observation| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.on_observation`
+.. |PrimitiveProvider.observe_test_case| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.observe_test_case`
+.. |PrimitiveProvider.observe_information_messages| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.observe_information_messages`
 .. |PrimitiveProvider.per_test_case_context_manager| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.per_test_case_context_manager`
 .. |PrimitiveProvider.add_observability_callback| replace:: :data:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.add_observability_callback`
+.. |PrimitiveProvider.span_start| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.span_start`
+.. |PrimitiveProvider.span_end| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.span_end`
 
 .. |AVAILABLE_PROVIDERS| replace:: :data:`~hypothesis.internal.conjecture.providers.AVAILABLE_PROVIDERS`
 .. |TESTCASE_CALLBACKS| replace:: :data:`~hypothesis.internal.observability.TESTCASE_CALLBACKS`
+.. |OBSERVABILITY_CHOICES| replace:: :data:`~hypothesis.internal.observability.OBSERVABILITY_CHOICES`
 .. |BUFFER_SIZE| replace:: :data:`~hypothesis.internal.conjecture.engine.BUFFER_SIZE`
 .. |MAX_SHRINKS| replace:: :data:`~hypothesis.internal.conjecture.engine.MAX_SHRINKS`
 .. |MAX_SHRINKING_SECONDS| replace:: :data:`~hypothesis.internal.conjecture.engine.MAX_SHRINKING_SECONDS`
 .. |BackendCannotProceed| replace:: :exc:`~hypothesis.errors.BackendCannotProceed`
 
 .. |@rule| replace:: :func:`@rule <hypothesis.stateful.rule>`
+.. |@precondition| replace:: :func:`@precondition <hypothesis.stateful.precondition>`
 .. |RuleBasedStateMachine| replace:: :class:`~hypothesis.stateful.RuleBasedStateMachine`
 .. |run_state_machine_as_test| replace:: :func:`~hypothesis.stateful.run_state_machine_as_test`
 

diff --git a/hypothesis-python/docs/reference/integrations.rst b/hypothesis-python/docs/reference/integrations.rst
@@ -156,11 +156,33 @@ which includes infinities and NaN.  This is valid in `JSON5 <https://json5.org/>
 and supported by `some JSON parsers <https://evanhahn.com/pythons-nonstandard-json-encoding/>`__
 including Gson in Java, ``JSON.parse()`` in Ruby, and of course in Python.
 
+Information message
+^^^^^^^^^^^^^^^^^^^
+
+.. jsonschema:: ./schema_observations.json#/oneOf/1
+   :hide_key: /additionalProperties, /type
+
+Test case
+^^^^^^^^^
+
 .. jsonschema:: ./schema_observations.json#/oneOf/0
    :hide_key: /additionalProperties, /type
-.. jsonschema:: ./schema_observations.json#/oneOf/1
+
+Hypothesis metadata
++++++++++++++++++++
+
+While the observability format is agnostic to the property-based testing library which generated it, Hypothesis includes specific values in the ``metadata`` key for test cases. You may rely on these being present if and only if the observation was generated by Hypothesis.
+
+.. jsonschema:: ./schema_metadata.json
    :hide_key: /additionalProperties, /type
 
+Choices metadata
+++++++++++++++++
+
+These additional metadata elements are included in ``metadata`` (as e.g. ``metadata["choice_nodes"]`` or ``metadata["choice_spans"]``), if and only if |OBSERVABILITY_CHOICES| is set.
+
+.. jsonschema:: ./schema_metadata_choices.json
+   :hide_key: /additionalProperties, /type
 
 .. _pytest-plugin:
 

diff --git a/hypothesis-python/docs/reference/internals.rst b/hypothesis-python/docs/reference/internals.rst
@@ -32,7 +32,7 @@ Observability
 
 .. autodata:: hypothesis.internal.observability.TESTCASE_CALLBACKS
 .. autodata:: hypothesis.internal.observability.OBSERVABILITY_COLLECT_COVERAGE
-
+.. autodata:: hypothesis.internal.observability.OBSERVABILITY_CHOICES
 
 Engine constants
 ----------------

diff --git a/hypothesis-python/docs/reference/schema_metadata.json b/hypothesis-python/docs/reference/schema_metadata.json
@@ -0,0 +1,62 @@
+{
+    "type": "object",
+    "properties": {
+        "traceback": {
+            "type": ["string", "null"],
+            "description": "The traceback for failing tests, if and only if ``status == \"failed\"``."
+        },
+        "reproduction_decorator": {
+            "type": ["string", "null"],
+            "description": "The ``@reproduce_failure`` decorator string for failing tests, if and only if ``status == \"failed\"``."
+        },
+        "predicates": {
+            "type": "object",
+            "description": "The number of times each |assume| and |@precondition| predicate was satisfied (``True``) and not satisfied (``False``).",
+            "additionalProperties": {
+                "type": "object",
+                "properties": {
+                    "satisfied": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "description": "The number of times this predicate was satisfied (``True``)."
+                    },
+                    "unsatisfied": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "description": "The number of times this predicate was not satisfied (``False``)."
+                    }
+                },
+                "required": ["satisfied", "unsatisfied"],
+                "additionalProperties": false
+            }
+        },
+        "backend": {
+            "type": "object",
+            "description": "Backend-specific observations from |PrimitiveProvider.observe_test_case| and |PrimitiveProvider.observe_information_messages|."
+        },
+        "sys.argv": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "The result of ``sys.argv``."
+        },
+        "os.getpid()": {
+            "type": "integer",
+            "description": "The result of ``os.getpid()``."
+        },
+        "imported_at": {
+            "type": "number",
+            "description": "The unix timestamp when Hypothesis was imported."
+        },
+        "data_status": {
+            "type": "number",
+            "enum": [0, 1, 2, 3],
+            "description": "The internal status of the ConjectureData for this test case. The values are as follows: ``Status.OVERRUN = 0``, ``Status.INVALID = 1``, ``Status.VALID = 2``, and ``Status.INTERESTING = 3``."
+        },
+        "interesting_origin": {
+            "type": ["string", "null"],
+            "description": "The internal ``InterestingOrigin`` object for failing tests, if and only if ``status == \"failed\"``. The ``traceback`` string value is derived from this object."
+        }
+    },
+    "required": ["traceback", "reproduction_decorator", "predicates", "backend", "sys.argv", "os.getpid()", "imported_at", "data_status", "interesting_origin"],
+    "additionalProperties": false
+}
diff --git a/hypothesis-python/docs/reference/schema_metadata_choices.json b/hypothesis-python/docs/reference/schema_metadata_choices.json
@@ -0,0 +1,39 @@
+{
+    "type": "object",
+    "properties": {
+        "choice_nodes": {
+            "type": ["array", "null"],
+            "description": ".. warning::\n\n  EXPERIMENTAL AND UNSTABLE. This attribute may change format or disappear without warning.\n\nThe sequence of choices made during this test case. This includes the choice value, as well as its constraints and whether it was forced or not.\n\nOnly present if |OBSERVABILITY_CHOICES| is ``True``.\n\n.. note::\n\n  The choice sequence is a relatively low-level implementation detail of Hypothesis, and is exposed in observability for users building tools or research on top of Hypothesis. See |PrimitiveProvider| for more details about the choice sequence.",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "enum": ["integer", "float", "string", "bytes", "boolean"],
+                        "description": "The type of choice made. Corresponds to a call to |PrimitiveProvider.draw_integer|, |PrimitiveProvider.draw_float|, |PrimitiveProvider.draw_string|, |PrimitiveProvider.draw_bytes|, or |PrimitiveProvider.draw_boolean|."
+                    },
+                    "value": {
+                        "description": "The value of the choice. Corresponds to the value returned by a ``PrimitiveProvider.draw_*`` method.\n\n``NaN`` float values are returned as ``[\"float\", <float64_int_value>]``, to distinguish ``NaN`` floats with nonstandard bit patterns. Integers with  ``abs(value) >= 2**63`` are returned as ``[\"integer\", str(value)]``, for compatibility with tools with integer size limitations. Bytes are returned as ``[\"bytes\", base64.b64encode(value)]``."
+                    },
+                    "constraints": {
+                        "type": "object",
+                        "description": "The constraints for this choice. Corresponds to the constraints passed to a ``PrimitiveProvider.draw_*`` method. ``NaN`` float values, integers with ``abs(value) >= 2**63``, and byte values for constraints are transformed as for the ``value`` attribute."
+                    },
+                    "was_forced": {
+                        "type": "boolean",
+                        "description": "Whether this choice was forced. As an implementation detail, Hypothesis occasionally requires that some choices take on a specific value, for instance to end generation of collection elements early for performance. These values are called \"forced\", and have ``was_forced = True``."
+                    }
+                },
+                "required": ["type", "value", "constraints", "was_forced"],
+                "additionalProperties": false
+            }
+        },
+        "choice_spans": {
+            "type": "array",
+            "items": {"type": "array"},
+            "description": ".. warning::\n\n  EXPERIMENTAL AND UNSTABLE. This attribute may change format or disappear without warning.\n\nThe semantically-meaningful spans of the choice sequence of this test case.\n\nEach span has the format ``[label, start, end, discarded]``, where:\n\n* ``label`` is an opaque integer-value string shared by all spans drawn from a particular strategy.\n* ``start`` and ``end`` are indices into the choice sequence for this span, such that ``choices[start:end]`` are the corresponding choices.\n* ``discarded`` is a boolean indicating whether this span was discarded (see |PrimitiveProvider.span_end|).\n\nOnly present if |OBSERVABILITY_CHOICES| is ``True``.\n\n.. note::\n\n  Spans are a relatively low-level implementation detail of Hypothesis, and are exposed in observability for users building tools or research on top of Hypothesis. See |PrimitiveProvider| (and particularly |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|) for more details about spans."
+        }
+    },
+    "required": ["traceback", "reproduction_decorator", "predicates", "backend", "sys.argv", "os.getpid()", "imported_at", "data_status", "interesting_origin", "choice_nodes", "choice_spans"],
+    "additionalProperties": false
+}
diff --git a/hypothesis-python/docs/reference/schema_observations.json b/hypothesis-python/docs/reference/schema_observations.json
@@ -3,7 +3,6 @@
     "description": "PBT Observations define a standard way to communicate what happened when property-based tests were run.  They describe test cases, or general notifications classified as info, alert, or error messages.",
     "oneOf": [
         {
-            "title": "Test case",
             "description": "Describes the inputs to and result of running some test function on a particular input.  The test might have passed, failed, or been abandoned part way through (e.g. because we failed a |.filter| condition).",
             "type": "object",
             "properties": {
@@ -69,7 +68,6 @@
             "additionalProperties": false
         },
         {
-            "title": "Information message",
             "description": "Info, alert, and error messages correspond to a group of test cases or the overall run, and are intended for humans rather than machine analysis.",
             "type": "object",
             "properties": {

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -675,6 +675,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
                         "Falsifying example", "Falsifying explicit example", 1
                     )
 
+                empty_data.freeze()
                 tc = make_testcase(
                     run_start=state._start_timestamp,
                     property=state.test_identifier,
@@ -1302,6 +1303,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
                     data._observability_args = {}
                     self._string_repr = "<backend failed to realize symbolic arguments>"
 
+                data.freeze()
                 tc = make_testcase(
                     run_start=self._start_timestamp,
                     property=self.test_identifier,
@@ -1498,6 +1500,7 @@ def run_engine(self):
                 # execute_once() will always raise either the expected error, or Flaky.
                 raise NotImplementedError("This should be unreachable")
             finally:
+                ran_example.freeze()
                 # log our observability line for the final failing example
                 tc = make_testcase(
                     run_start=self._start_timestamp,
@@ -1521,11 +1524,7 @@ def run_engine(self):
                         f"{reproduction_decorator(falsifying_example.choices)} "
                         "as a decorator on your test case"
                     )
-                # Mostly useful for ``find`` and ensuring that objects that
-                # hold on to a reference to ``data`` know that it's now been
-                # finished and they can't draw more data from it.
-                ran_example.freeze()  # pragma: no branch
-                # No branch is possible here because we never have an active exception.
+
         _raise_to_user(
             errors_to_report,
             self.settings,
@@ -2096,6 +2095,7 @@ def fuzz_one_input(
                     raise
                 finally:
                     if TESTCASE_CALLBACKS:
+                        data.freeze()
                         tc = make_testcase(
                             run_start=state._start_timestamp,
                             property=state.test_identifier,

@@ -596,12 +596,59 @@ def on_observation(self, observation: TestCaseObservation) -> None:  # noqa: B02
     def span_start(self, label: int, /) -> None:  # noqa: B027  # non-abstract noop
         """Marks the beginning of a semantically meaningful span of choices.
 
-        Providers can optionally track this data to learn which sub-sequences
-        of draws correspond to a higher-level object, recovering the parse tree.
-        ``label`` is an opaque integer, which will be shared by all spans drawn
-        from a particular strategy.
+        Spans are a depth-first tree structure. A span is opened by a call to
+        |PrimitiveProvider.span_start|, and a call to |PrimitiveProvider.span_end|
+        closes the most recently opened span. So the following sequence of calls:
 
-        This method is called from ``ConjectureData.start_span()``.
+        .. code-block:: python
+
+            span_start(label=1)
+            n1 = draw_integer()
+            span_start(label=2)
+            b1 = draw_boolean()
+            n2 = draw_integer()
+            span_end()
+            f1 = draw_float()
+            span_end()
+
+        produces the following two spans of choices:
+
+        .. code-block::
+
+            1: [n1, b1, n2, f1]
+            2: [b1, n2]
+
+        Hypothesis uses spans to denote "semantically meaningful" sequences of
+        choices. For instance, Hypothesis opens a span for the sequence of choices
+        made while drawing from each strategy. Not every span corresponds to a
+        strategy; the generation of e.g. each element in |st.lists| is also marked
+        with a span, among others.
+
+        ``label`` is an opaque integer, which has no defined semantics.
+        The only guarantee made by Hypothesis is that all spans with the same
+        "meaning" will share the same ``label``. So all spans from the same
+        strategy will share the same label, as will e.g. the spans for |st.lists|
+        elements.
+
+        Providers can track calls to |PrimitiveProvider.span_start| and
+        |PrimitiveProvider.span_end| to learn something about the semantics of
+        the test's choice sequence. For instance, a provider could track the depth
+        of the span tree, or the number of unique labels, which says something about
+        the complexity of the choices being generated. Or a provider could track
+        the span tree across test cases in order to determine what strategies are
+        being used in what contexts.
+
+        It is possible for Hypothesis to start and immediately stop a span,
+        without calling a ``draw_*`` method in between. These spans contain zero
+        choices.
+
+        Hypothesis will always balance the number of calls to
+        |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|. A call
+        to |PrimitiveProvider.span_start| will always be followed by a call to
+        |PrimitiveProvider.span_end| before the end of the test case.
+
+        |PrimitiveProvider.span_start| is called from ``ConjectureData.start_span()``
+        internally.
         """
 
     def span_end(self, discard: bool, /) -> None:  # noqa: B027, FBT001
@@ -611,7 +658,8 @@ def span_end(self, discard: bool, /) -> None:  # noqa: B027, FBT001
         as unlikely to contribute to the input data as seen by the user's test.
         Note however that side effects can make this determination unsound.
 
-        This method is called from ``ConjectureData.stop_span()``.
+        |PrimitiveProvider.span_end| is called from ``ConjectureData.stop_span()``
+        internally.
         """