Skip to content

Commit 37e0960

Browse files
committed
add choice_nodes, data_status, and interesting_origin to metadata
1 parent 8c1180f commit 37e0960

File tree

4 files changed

+258
-8
lines changed

4 files changed

+258
-8
lines changed

hypothesis-python/src/hypothesis/internal/observability.py

Lines changed: 135 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010

1111
"""Observability tools to spit out analysis-ready tables, one row per test case."""
1212

13+
import base64
14+
import dataclasses
1315
import json
16+
import math
1417
import os
1518
import sys
1619
import time
@@ -20,10 +23,24 @@
2023
from dataclasses import dataclass
2124
from datetime import date, timedelta
2225
from functools import lru_cache
23-
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union
26+
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast
2427

2528
from hypothesis.configuration import storage_directory
2629
from hypothesis.errors import HypothesisWarning
30+
from hypothesis.internal.conjecture.choice import (
31+
BooleanConstraints,
32+
BytesConstraints,
33+
ChoiceConstraintsT,
34+
ChoiceNode,
35+
ChoiceT,
36+
ChoiceTypeT,
37+
FloatConstraints,
38+
IntegerConstraints,
39+
StringConstraints,
40+
)
41+
from hypothesis.internal.escalation import InterestingOrigin
42+
from hypothesis.internal.floats import float_to_int
43+
from hypothesis.internal.intervalsets import IntervalSet
2744

2845
if TYPE_CHECKING:
2946
from typing import TypeAlias
@@ -43,6 +60,89 @@ def update_count(self, *, condition: bool) -> None:
4360
self.unsatisfied += 1
4461

4562

63+
def _choice_to_json(choice: Union[ChoiceT, None]) -> Any:
64+
if choice is None:
65+
return None
66+
# see the note on the same check in to_jsonable for why we cast large
67+
# integers to floats.
68+
if isinstance(choice, int) and not isinstance(choice, bool) and choice > 2**63:
69+
return ["integer", float(choice)]
70+
elif isinstance(choice, bytes):
71+
return ["bytes", base64.b64encode(choice).decode()]
72+
elif isinstance(choice, float) and math.isnan(choice):
73+
# handle nonstandard nan bit patterns. We don't need to do this for -0.0
74+
# vs 0.0 since json doesn't normalize -0.0 to 0.0.
75+
return ["float", float_to_int(choice)]
76+
return choice
77+
78+
79+
def choices_to_json(choices: tuple[ChoiceT, ...]) -> list[Any]:
80+
return [_choice_to_json(choice) for choice in choices]
81+
82+
83+
def _constraints_to_json(
84+
choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT
85+
) -> dict[str, Any]:
86+
constraints = constraints.copy()
87+
if choice_type == "integer":
88+
constraints = cast(IntegerConstraints, constraints)
89+
return {
90+
"min_value": _choice_to_json(constraints["min_value"]),
91+
"max_value": _choice_to_json(constraints["max_value"]),
92+
"weights": (
93+
None
94+
if constraints["weights"] is None
95+
# wrap up in a list, instead of a dict, because json dicts
96+
# require string keys
97+
else [
98+
(_choice_to_json(k), v) for k, v in constraints["weights"].items()
99+
]
100+
),
101+
"shrink_towards": _choice_to_json(constraints["shrink_towards"]),
102+
}
103+
elif choice_type == "float":
104+
constraints = cast(FloatConstraints, constraints)
105+
return {
106+
"min_value": _choice_to_json(constraints["min_value"]),
107+
"max_value": _choice_to_json(constraints["max_value"]),
108+
"allow_nan": constraints["allow_nan"],
109+
"smallest_nonzero_magnitude": constraints["smallest_nonzero_magnitude"],
110+
}
111+
elif choice_type == "string":
112+
constraints = cast(StringConstraints, constraints)
113+
assert isinstance(constraints["intervals"], IntervalSet)
114+
return {
115+
"intervals": constraints["intervals"].intervals,
116+
"min_size": _choice_to_json(constraints["min_size"]),
117+
"max_size": _choice_to_json(constraints["max_size"]),
118+
}
119+
elif choice_type == "bytes":
120+
constraints = cast(BytesConstraints, constraints)
121+
return {
122+
"min_size": _choice_to_json(constraints["min_size"]),
123+
"max_size": _choice_to_json(constraints["max_size"]),
124+
}
125+
elif choice_type == "boolean":
126+
constraints = cast(BooleanConstraints, constraints)
127+
return {
128+
"p": constraints["p"],
129+
}
130+
else:
131+
raise NotImplementedError(f"unknown choice type {choice_type}")
132+
133+
134+
def nodes_to_json(nodes: tuple[ChoiceNode, ...]) -> list[dict[str, Any]]:
135+
return [
136+
{
137+
"type": node.type,
138+
"value": _choice_to_json(node.value),
139+
"constraints": _constraints_to_json(node.type, node.constraints),
140+
"was_forced": node.was_forced,
141+
}
142+
for node in nodes
143+
]
144+
145+
46146
@dataclass
47147
class ObservationMetadata:
48148
traceback: Optional[str]
@@ -52,6 +152,28 @@ class ObservationMetadata:
52152
sys_argv: list[str]
53153
os_getpid: int
54154
imported_at: float
155+
data_status: "Status"
156+
interesting_origin: Optional[InterestingOrigin]
157+
choice_nodes: Optional[tuple[ChoiceNode, ...]]
158+
159+
def to_json(self) -> dict[str, Any]:
160+
data = {
161+
"traceback": self.traceback,
162+
"reproduction_decorator": self.reproduction_decorator,
163+
"predicates": self.predicates,
164+
"backend": self.backend,
165+
"sys.argv": self.sys_argv,
166+
"os.getpid()": self.os_getpid,
167+
"imported_at": self.imported_at,
168+
"data_status": self.data_status,
169+
"interesting_origin": self.interesting_origin,
170+
"choice_nodes": (
171+
None if self.choice_nodes is None else nodes_to_json(self.choice_nodes)
172+
),
173+
}
174+
# check that we didn't forget one
175+
assert len(data) == len(dataclasses.fields(self))
176+
return data
55177

56178

57179
@dataclass
@@ -183,6 +305,9 @@ def make_testcase(
183305
),
184306
"predicates": dict(data._observability_predicates),
185307
"backend": backend_metadata or {},
308+
"data_status": data.status,
309+
"interesting_origin": data.interesting_origin,
310+
"choice_nodes": data.nodes if OBSERVABILITY_CHOICE_NODES else None,
186311
**_system_metadata(),
187312
# unpack last so it takes precedence for duplicate keys
188313
**(metadata or {}),
@@ -204,11 +329,7 @@ def _deliver_to_file(observation: Observation) -> None: # pragma: no cover
204329
fname.parent.mkdir(exist_ok=True, parents=True)
205330
_WROTE_TO.add(fname)
206331
with fname.open(mode="a") as f:
207-
obs_json: dict[str, Any] = to_jsonable(observation, avoid_realization=False) # type: ignore
208-
if obs_json["type"] == "test_case":
209-
obs_json["metadata"]["sys.argv"] = obs_json["metadata"].pop("sys_argv")
210-
obs_json["metadata"]["os.getpid()"] = obs_json["metadata"].pop("os_getpid")
211-
f.write(json.dumps(obs_json) + "\n")
332+
f.write(json.dumps(to_jsonable(observation, avoid_realization=False)) + "\n")
212333

213334

214335
_imported_at = time.time()
@@ -231,6 +352,10 @@ def _system_metadata() -> dict[str, Any]:
231352
OBSERVABILITY_COLLECT_COVERAGE = (
232353
"HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY_NOCOVER" not in os.environ
233354
)
355+
OBSERVABILITY_CHOICE_NODES = (
356+
"HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY_CHOICE_NODES" in os.environ
357+
)
358+
234359
if OBSERVABILITY_COLLECT_COVERAGE is False and (
235360
sys.version_info[:2] >= (3, 12)
236361
): # pragma: no cover
@@ -240,8 +365,10 @@ def _system_metadata() -> dict[str, Any]:
240365
HypothesisWarning,
241366
stacklevel=2,
242367
)
243-
if "HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY" in os.environ or (
244-
OBSERVABILITY_COLLECT_COVERAGE is False
368+
369+
if (
370+
"HYPOTHESIS_EXPERIMENTAL_OBSERVABILITY" in os.environ
371+
or OBSERVABILITY_COLLECT_COVERAGE is False
245372
): # pragma: no cover
246373
TESTCASE_CALLBACKS.append(_deliver_to_file)
247374

hypothesis-python/src/hypothesis/strategies/_internal/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,10 @@ def to_jsonable(obj: object, *, avoid_realization: bool) -> object:
165165
known types.
166166
"""
167167
if isinstance(obj, (str, int, float, bool, type(None))):
168+
# We convert integers of 2**63 to floats, to avoid crashing external
169+
# utilities with a 64 bit integer cap (notable, sqlite). See
170+
# https://github.com/HypothesisWorks/hypothesis/pull/3797#discussion_r1413425110
171+
# and https://github.com/simonw/sqlite-utils/issues/605.
168172
if isinstance(obj, int) and not isinstance(obj, bool) and abs(obj) >= 2**63:
169173
# Silently clamp very large ints to max_float, to avoid OverflowError when
170174
# casting to float. (but avoid adding more constraints to symbolic values)

hypothesis-python/tests/conjecture/common.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,12 @@ def draw_value(choice_type, constraints):
394394
return getattr(data, f"draw_{choice_type}")(**constraints)
395395

396396

397+
@st.composite
398+
def choices(draw):
399+
(choice_type, constraints) = draw(choice_types_constraints())
400+
return draw_value(choice_type, constraints)
401+
402+
397403
@st.composite
398404
def nodes(draw, *, was_forced=None, choice_types=None):
399405
if choice_types is None:

hypothesis-python/tests/cover/test_observability.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
99
# obtain one at https://mozilla.org/MPL/2.0/.
1010

11+
import base64
12+
import json
13+
import math
1114
import textwrap
1215
from contextlib import nullcontext
1316

@@ -26,7 +29,11 @@
2629
)
2730
from hypothesis.database import InMemoryExampleDatabase
2831
from hypothesis.internal.compat import PYPY
32+
from hypothesis.internal.conjecture.choice import ChoiceNode, choices_key
2933
from hypothesis.internal.coverage import IN_COVERAGE_TESTS
34+
from hypothesis.internal.floats import SIGNALING_NAN, int_to_float
35+
from hypothesis.internal.intervalsets import IntervalSet
36+
from hypothesis.internal.observability import choices_to_json, nodes_to_json
3037
from hypothesis.stateful import (
3138
RuleBasedStateMachine,
3239
invariant,
@@ -35,6 +42,7 @@
3542
)
3643

3744
from tests.common.utils import Why, capture_observations, xfail_on_crosshair
45+
from tests.conjecture.common import choices, nodes
3846

3947

4048
@seed("deterministic so we don't miss some combination of features")
@@ -332,3 +340,108 @@ def test_fails(should_fail, should_fail_assume):
332340
assert len(ls) == 1
333341
assert ls[0].status == expected_status
334342
assert ls[0].how_generated == "fuzz_one_input"
343+
344+
345+
def _decode_choice(value):
346+
if isinstance(value, list):
347+
if value[0] == "integer":
348+
# large integers get cast to float, stored as ["integer", float(value)]
349+
assert isinstance(value[1], float)
350+
return int(value[1])
351+
elif value[0] == "bytes":
352+
assert isinstance(value[1], str)
353+
return base64.b64decode(value[1])
354+
elif value[0] == "float":
355+
assert isinstance(value[1], int)
356+
choice = int_to_float(value[1])
357+
assert math.isnan(choice)
358+
return choice
359+
else:
360+
return value[1]
361+
362+
return value
363+
364+
365+
def _decode_choices(data):
366+
return [_decode_choice(value) for value in data]
367+
368+
369+
def _decode_nodes(data):
370+
return [
371+
ChoiceNode(
372+
type=node["type"],
373+
value=_decode_choice(node["value"]),
374+
constraints=_decode_constraints(node["type"], node["constraints"]),
375+
was_forced=node["was_forced"],
376+
)
377+
for node in data
378+
]
379+
380+
381+
def _decode_constraints(choice_type, data):
382+
if choice_type == "integer":
383+
return {
384+
"min_value": _decode_choice(data["min_value"]),
385+
"max_value": _decode_choice(data["max_value"]),
386+
"weights": (
387+
None
388+
if data["weights"] is None
389+
else {_decode_choice(k): v for k, v in data["weights"]}
390+
),
391+
"shrink_towards": _decode_choice(data["shrink_towards"]),
392+
}
393+
elif choice_type == "float":
394+
return {
395+
"min_value": _decode_choice(data["min_value"]),
396+
"max_value": _decode_choice(data["max_value"]),
397+
"allow_nan": data["allow_nan"],
398+
"smallest_nonzero_magnitude": data["smallest_nonzero_magnitude"],
399+
}
400+
elif choice_type == "string":
401+
return {
402+
"intervals": IntervalSet(tuple(data["intervals"])),
403+
"min_size": _decode_choice(data["min_size"]),
404+
"max_size": _decode_choice(data["max_size"]),
405+
}
406+
elif choice_type == "bytes":
407+
return {
408+
"min_size": _decode_choice(data["min_size"]),
409+
"max_size": _decode_choice(data["max_size"]),
410+
}
411+
elif choice_type == "boolean":
412+
return {"p": data["p"]}
413+
else:
414+
raise ValueError(f"unknown choice type {choice_type}")
415+
416+
417+
def _will_be_cast_to_float(value):
418+
return isinstance(value, int) and abs(value) >= 2**63
419+
420+
421+
@example([0.0])
422+
@example([-0.0])
423+
@example([SIGNALING_NAN])
424+
@example([math.nan])
425+
@example([math.inf])
426+
@example([-math.inf])
427+
@given(st.lists(choices()))
428+
def test_choices_json_roundtrips(choices):
429+
# choices_to_json and nodes_to_json roundtrip, *except for large integers*,
430+
# which get cast to the nearest integer-valued float on roundtrip. This is
431+
# an intentional design decision of the format; see related comment in
432+
# to_jsonable.
433+
if any(_will_be_cast_to_float(choice) for choice in choices):
434+
assume(False)
435+
choices2 = _decode_choices(json.loads(json.dumps(choices_to_json(choices))))
436+
assert choices_key(choices) == choices_key(choices2)
437+
438+
439+
@given(st.lists(nodes()))
440+
def test_nodes_json_roundtrips(nodes):
441+
for node in nodes:
442+
if _will_be_cast_to_float(node.value) or any(
443+
_will_be_cast_to_float(value) for value in node.constraints.values()
444+
):
445+
assume(False)
446+
nodes2 = _decode_nodes(json.loads(json.dumps(nodes_to_json(nodes))))
447+
assert nodes == nodes2

0 commit comments

Comments
 (0)