HypothesisWorks · Zac-HD · Jun 8, 2025 · Jun 1, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: patch
+
+This release adds the experimental and unstable |OBSERVABILITY_CHOICES| option for :ref:`observability <observability>`, which includes the choice sequence in ``metadata.choice_nodes`` for test case observations if set.
+
+We are actively working towards a better interface for this. Feel free to use |OBSERVABILITY_CHOICES| to experiment, but don't rely on it yet!
diff --git a/hypothesis-python/docs/prolog.rst b/hypothesis-python/docs/prolog.rst
@@ -116,17 +116,23 @@
 .. |PrimitiveProvider.draw_string| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.draw_string`
 .. |PrimitiveProvider.draw_bytes| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.draw_bytes`
 .. |PrimitiveProvider.on_observation| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.on_observation`
+.. |PrimitiveProvider.observe_test_case| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.observe_test_case`
+.. |PrimitiveProvider.observe_information_messages| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.observe_information_messages`
 .. |PrimitiveProvider.per_test_case_context_manager| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.per_test_case_context_manager`
 .. |PrimitiveProvider.add_observability_callback| replace:: :data:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.add_observability_callback`
+.. |PrimitiveProvider.span_start| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.span_start`
+.. |PrimitiveProvider.span_end| replace:: :func:`~hypothesis.internal.conjecture.providers.PrimitiveProvider.span_end`
 
 .. |AVAILABLE_PROVIDERS| replace:: :data:`~hypothesis.internal.conjecture.providers.AVAILABLE_PROVIDERS`
 .. |TESTCASE_CALLBACKS| replace:: :data:`~hypothesis.internal.observability.TESTCASE_CALLBACKS`
+.. |OBSERVABILITY_CHOICES| replace:: :data:`~hypothesis.internal.observability.OBSERVABILITY_CHOICES`
 .. |BUFFER_SIZE| replace:: :data:`~hypothesis.internal.conjecture.engine.BUFFER_SIZE`
 .. |MAX_SHRINKS| replace:: :data:`~hypothesis.internal.conjecture.engine.MAX_SHRINKS`
 .. |MAX_SHRINKING_SECONDS| replace:: :data:`~hypothesis.internal.conjecture.engine.MAX_SHRINKING_SECONDS`
 .. |BackendCannotProceed| replace:: :exc:`~hypothesis.errors.BackendCannotProceed`
 
 .. |@rule| replace:: :func:`@rule <hypothesis.stateful.rule>`
+.. |@precondition| replace:: :func:`@precondition <hypothesis.stateful.precondition>`
 .. |RuleBasedStateMachine| replace:: :class:`~hypothesis.stateful.RuleBasedStateMachine`
 .. |run_state_machine_as_test| replace:: :func:`~hypothesis.stateful.run_state_machine_as_test`
 

diff --git a/hypothesis-python/docs/reference/integrations.rst b/hypothesis-python/docs/reference/integrations.rst
@@ -162,6 +162,15 @@ including Gson in Java, ``JSON.parse()`` in Ruby, and of course in Python.
    :hide_key: /additionalProperties, /type
 
 
+Hypothesis Metadata
+^^^^^^^^^^^^^^^^^^^
+
+While the observability format is agnostic to the property-based testing library which generated it, Hypothesis includes specific values in the ``metadata`` key for test cases. You may rely on these being present if and only if the observation was generated by Hypothesis.
+
+.. jsonschema:: ./schema_metadata.json
+   :hide_key: /additionalProperties, /type
+
+
 .. _pytest-plugin:
 
 The Hypothesis pytest plugin

diff --git a/hypothesis-python/docs/reference/internals.rst b/hypothesis-python/docs/reference/internals.rst
@@ -32,7 +32,7 @@ Observability
 
 .. autodata:: hypothesis.internal.observability.TESTCASE_CALLBACKS
 .. autodata:: hypothesis.internal.observability.OBSERVABILITY_COLLECT_COVERAGE
-
+.. autodata:: hypothesis.internal.observability.OBSERVABILITY_CHOICES
 
 Engine constants
 ----------------

diff --git a/hypothesis-python/docs/reference/schema_metadata.json b/hypothesis-python/docs/reference/schema_metadata.json
@@ -0,0 +1,95 @@
+{
+    "description": "Hypothesis-specific values included in the ``metadata`` key of observations for test cases.",
+    "type": "object",
+    "properties": {
+        "traceback": {
+            "type": ["string", "null"],
+            "description": "The traceback for failing tests, if and only if ``status == \"failed\"``."
+        },
+        "reproduction_decorator": {
+            "type": ["string", "null"],
+            "description": "The ``@reproduce_failure`` decorator string for failing tests, if and only if ``status == \"failed\"``."
+        },
+        "predicates": {
+            "type": "object",
+            "description": "The number of times each |assume| and |@precondition| predicate was satisfied (``True``) and not satisfied (``False``).",
+            "additionalProperties": {
+                "type": "object",
+                "properties": {
+                    "satisfied": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "description": "The number of times this predicate was satisfied (``True``)."
+                    },
+                    "unsatisfied": {
+                        "type": "integer",
+                        "minimum": 0,
+                        "description": "The number of times this predicate was not satisfied (``False``)."
+                    }
+                },
+                "required": ["satisfied", "unsatisfied"],
+                "additionalProperties": false
+            }
+        },
+        "backend": {
+            "type": "object",
+            "description": "Backend-specific observations from |PrimitiveProvider.observe_test_case| and |PrimitiveProvider.observe_information_messages|."
+        },
+        "sys.argv": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "The result of ``sys.argv``."
+        },
+        "os.getpid()": {
+            "type": "integer",
+            "description": "The result of ``os.getpid()``."
+        },
+        "imported_at": {
+            "type": "number",
+            "description": "The unix timestamp when Hypothesis was imported."
+        },
+        "data_status": {
+            "type": "number",
+            "enum": [0, 1, 2, 3],
+            "description": "The internal status of the ConjectureData for this test case. The values are as follows: ``Status.OVERRUN = 0``, ``Status.INVALID = 1``, ``Status.VALID = 2``, and ``Status.INTERESTING = 3``."
+        },
+        "interesting_origin": {
+            "type": ["string", "null"],
+            "description": "The internal ``InterestingOrigin`` object for failing tests, if and only if ``status == \"failed\"``. The ``traceback`` string value is derived from this object."
+        },
+        "choice_nodes": {
+            "type": ["array", "null"],
+            "description": ".. warning::\n\n  EXPERIMENTAL AND UNSTABLE. This attribute may change format or disappear without warning.\n\nThe sequence of choices made during this test case. This includes the choice value, as well as its constraints and whether it was forced or not.\n\nOnly present if |OBSERVABILITY_CHOICES| is ``True``.\n\n.. note::\n\n  The choice sequence is a relatively low-level implementation detail of Hypothesis, and is exposed in observability for users building tools or research on top of Hypothesis. See |PrimitiveProvider| for more details about the choice sequence.",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "enum": ["integer", "float", "string", "bytes", "boolean"],
+                        "description": "The type of choice made. Corresponds to a call to |PrimitiveProvider.draw_integer|, |PrimitiveProvider.draw_float|, |PrimitiveProvider.draw_string|, |PrimitiveProvider.draw_bytes|, or |PrimitiveProvider.draw_boolean|."
+                    },
+                    "value": {
+                        "description": "The value of the choice. Corresponds to the value returned by a ``PrimitiveProvider.draw_*`` method.\n\n``NaN`` float values are returned as ``[\"float\", <float64_int_value>]``, to distinguish ``NaN`` floats with nonstandard bit patterns. Integers with  ``abs(value) >= 2**63`` are returned as ``[\"integer\", str(value)]``, for compatibility with tools with integer size limitations. Bytes are returned as ``[\"bytes\", base64.b64encode(value)]``."
+                    },
+                    "constraints": {
+                        "type": "object",
+                        "description": "The constraints for this choice. Corresponds to the constraints passed to a ``PrimitiveProvider.draw_*`` method. ``NaN`` float values, integers with ``abs(value) >= 2**63``, and byte values for constraints are transformed as for the ``value`` attribute."
+                    },
+                    "was_forced": {
+                        "type": "boolean",
+                        "description": "Whether this choice was forced. As an implementation detail, Hypothesis occasionally requires that some choices take on a specific value, for instance to end generation of collection elements early for performance. These values are called \"forced\", and have ``was_forced = True``."
+                    }
+                },
+                "required": ["type", "value", "constraints", "was_forced"],
+                "additionalProperties": false
+            }
+        },
+        "choice_spans": {
+            "type": "array",
+            "items": {"type": "array"},
+            "description": ".. warning::\n\n  EXPERIMENTAL AND UNSTABLE. This attribute may change format or disappear without warning.\n\nThe semantically-meaningful spans of the choice sequence of this test case.\n\nEach span has the format ``[label, start, end, discarded]``, where:\n\n* ``label`` is an opaque integer-value string shared by all spans drawn from a particular strategy.\n* ``start`` and ``end`` are indices into the choice sequence for this span, such that ``choices[start:end]`` are the corresponding choices.\n* ``discarded`` is a boolean indicating whether this span was discarded (see |PrimitiveProvider.span_end|).\n\nOnly present if |OBSERVABILITY_CHOICES| is ``True``.\n\n.. note::\n\n  Spans are a relatively low-level implementation detail of Hypothesis, and are exposed in observability for users building tools or research on top of Hypothesis. See |PrimitiveProvider| (and particularly |PrimitiveProvider.span_start| and |PrimitiveProvider.span_end|) for more details about spans."
+        }
+    },
+    "required": ["traceback", "reproduction_decorator", "predicates", "backend", "sys_argv", "os_getpid", "imported_at", "data_status", "interesting_origin", "choice_nodes"],
+    "additionalProperties": false
+}
diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -675,6 +675,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
                         "Falsifying example", "Falsifying explicit example", 1
                     )
 
+                empty_data.freeze()
                 tc = make_testcase(
                     run_start=state._start_timestamp,
                     property=state.test_identifier,
@@ -1302,6 +1303,7 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
                     data._observability_args = {}
                     self._string_repr = "<backend failed to realize symbolic arguments>"
 
+                data.freeze()
                 tc = make_testcase(
                     run_start=self._start_timestamp,
                     property=self.test_identifier,
@@ -1498,6 +1500,7 @@ def run_engine(self):
                 # execute_once() will always raise either the expected error, or Flaky.
                 raise NotImplementedError("This should be unreachable")
             finally:
+                ran_example.freeze()
                 # log our observability line for the final failing example
                 tc = make_testcase(
                     run_start=self._start_timestamp,
@@ -1521,11 +1524,7 @@ def run_engine(self):
                         f"{reproduction_decorator(falsifying_example.choices)} "
                         "as a decorator on your test case"
                     )
-                # Mostly useful for ``find`` and ensuring that objects that
-                # hold on to a reference to ``data`` know that it's now been
-                # finished and they can't draw more data from it.
-                ran_example.freeze()  # pragma: no branch
-                # No branch is possible here because we never have an active exception.
+
         _raise_to_user(
             errors_to_report,
             self.settings,
@@ -2096,6 +2095,7 @@ def fuzz_one_input(
                     raise
                 finally:
                     if TESTCASE_CALLBACKS:
+                        data.freeze()
                         tc = make_testcase(
                             run_start=state._start_timestamp,
                             property=state.test_identifier,