diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..030b16a6ad --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,5 @@ +RELEASE_TYPE: patch + +|st.one_of| now chooses a subset of its strategies to disable each time it generates a value. For example, it was previously unlikely that ``st.lists(st.integers() | st.floats() | st.text()`` would generate a long list containing only string values. This is now more likely, along with other uncommon combinations. + +This technique is called `swarm testing `__, and can considerably improve bug-finding power, for instance because some features actively prevent other interesting behavior from running. See :issue:`2643` for more details. diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py b/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py index f37ff421e9..a4946ef06f 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/featureflags.py @@ -58,10 +58,16 @@ def __init__( # original model. # # We implement this as a single 8-bit integer and enable features which - # score >= that value. In particular when self.__baseline is 0, all + # score >= that value. In particular when self.__p_disabled is 0, all # features will be enabled. This is so that we shrink in the direction # of more features being enabled. if self.__data is not None: + # this really messes up our deduplication tracking, because all 255 + # draws are unique. But we more or less have to choose whether something + # is enabled on-demand with a prior probability, rather than choosing what + # is enabled up front, because the latter results in a very large choice + # sequence when there are lots of possibilities. + # (a tradeoff might be selecting up front when there are <= 3 options?) self.__p_disabled = self.__data.draw_integer(0, 254) / 255 else: # If data is None we're in example mode so all that matters is the @@ -86,9 +92,11 @@ def is_enabled(self, name: Any) -> bool: return not self.__is_disabled.get(name, False) data = self.__data - + # TODO I wouldn't expect a span here to do anything, since it only ever + # encapsulates a single draw, but test_minimizes_individual_features_to_open + # fails without this. Can we improve the shrinker so this span isn't + # necessary? data.start_span(label=FEATURE_LABEL) - # If we've already decided on this feature then we don't actually # need to draw anything, but we do write the same decision to the # input stream. This allows us to lazily decide whether a feature diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py index 31c24cc808..7ebdf0647e 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py @@ -28,6 +28,7 @@ overload, ) +from hypothesis import strategies as st from hypothesis._settings import HealthCheck, Phase, Verbosity, settings from hypothesis.control import _current_build_context, current_build_context from hypothesis.errors import ( @@ -689,10 +690,16 @@ class OneOfStrategy(SearchStrategy[Ex]): """ def __init__(self, strategies: Sequence[SearchStrategy[Ex]]): + from hypothesis.strategies._internal.featureflags import FeatureStrategy + super().__init__() self.original_strategies = tuple(strategies) self.__element_strategies: Optional[Sequence[SearchStrategy[Ex]]] = None self.__in_branches = False + self.enabled_branches_strategy = st.shared( + FeatureStrategy(self.original_strategies), + key=("one_of swarm testing", self.original_strategies), + ) def calc_is_empty(self, recur: RecurT) -> bool: return all(recur(e) for e in self.original_strategies) @@ -739,9 +746,10 @@ def calc_label(self) -> int: ) def do_draw(self, data: ConjectureData) -> Ex: + feature_flags = data.draw(self.enabled_branches_strategy) strategy = data.draw( SampledFromStrategy(self.element_strategies).filter( - lambda s: s.available(data) + lambda s: s.available(data) and feature_flags.is_enabled(s) ) ) return data.draw(strategy) diff --git a/hypothesis-python/tests/cover/test_feature_flags.py b/hypothesis-python/tests/cover/test_feature_flags.py index d87b7cff2a..5fede59cfa 100644 --- a/hypothesis-python/tests/cover/test_feature_flags.py +++ b/hypothesis-python/tests/cover/test_feature_flags.py @@ -13,26 +13,22 @@ from tests.common.debug import find_any, minimal -STRAT = FeatureStrategy() - def test_can_all_be_enabled(): - find_any(STRAT, lambda x: all(x.is_enabled(i) for i in range(100))) + find_any(FeatureStrategy(), lambda x: all(x.is_enabled(i) for i in range(100))) def test_minimizes_open(): features = range(10) - - flags = minimal(STRAT, lambda x: [x.is_enabled(i) for i in features]) - + flags = minimal(FeatureStrategy(), lambda x: [x.is_enabled(i) for i in features]) assert all(flags.is_enabled(i) for i in features) def test_minimizes_individual_features_to_open(): features = list(range(10)) - flags = minimal( - STRAT, lambda x: sum(x.is_enabled(i) for i in features) < len(features) + FeatureStrategy(), + lambda x: sum(x.is_enabled(i) for i in features) < len(features), ) assert all(flags.is_enabled(i) for i in features[:-1]) @@ -40,15 +36,11 @@ def test_minimizes_individual_features_to_open(): def test_marks_unknown_features_as_enabled(): - x = find_any(STRAT, lambda v: True) - - assert x.is_enabled("fish") + assert find_any(FeatureStrategy(), lambda v: True).is_enabled("fish") def test_by_default_all_enabled(): - f = FeatureFlags() - - assert f.is_enabled("foo") + assert FeatureFlags().is_enabled("foo") def test_eval_featureflags_repr(): @@ -62,20 +54,17 @@ def test_eval_featureflags_repr(): @given(st.data()) def test_repr_can_be_evalled(data): - flags = data.draw(STRAT) - + flags = data.draw(FeatureStrategy()) features = data.draw(st.lists(st.text(), unique=True)) for f in features: flags.is_enabled(f) flags2 = eval(repr(flags)) - for f in features: assert flags2.is_enabled(f) == flags.is_enabled(f) more_features = data.draw(st.lists(st.text().filter(lambda s: s not in features))) - for f in more_features: assert flags2.is_enabled(f) diff --git a/hypothesis-python/tests/nocover/test_precise_shrinking.py b/hypothesis-python/tests/nocover/test_precise_shrinking.py index aef364e845..8543a12fc2 100644 --- a/hypothesis-python/tests/nocover/test_precise_shrinking.py +++ b/hypothesis-python/tests/nocover/test_precise_shrinking.py @@ -286,10 +286,9 @@ def test_function(data): @pytest.mark.parametrize("a", list(itertools.product(*([common_strategies[1:]] * 2)))) -@pytest.mark.parametrize("block_falsey", [False, True]) @pytest.mark.parametrize("allow_sloppy", [False, True]) @pytest.mark.parametrize("seed", [0, 2452, 99085240570]) -def test_always_shrinks_to_none(a, seed, block_falsey, allow_sloppy): +def test_always_shrinks_to_none(a, seed, allow_sloppy): combined_strategy = st.one_of(st.none(), *a) result, value = find_random(combined_strategy, lambda x: x is not None) diff --git a/hypothesis-python/tests/quality/test_discovery_ability.py b/hypothesis-python/tests/quality/test_discovery_ability.py index eb72c7a5cf..ac65e67706 100644 --- a/hypothesis-python/tests/quality/test_discovery_ability.py +++ b/hypothesis-python/tests/quality/test_discovery_ability.py @@ -376,3 +376,33 @@ def double(x): test_can_produce_nasty_strings = define_test( text(), lambda s: s in {"NaN", "Inf", "undefined"}, p=0.01 ) + +oneof_strategy = lists(integers() | floats() | text() | tuples(integers())) + +test_oneof_produces_all_types = define_test( + oneof_strategy, + lambda v: len({type(x) for x in v}) == 4, + condition=lambda v: len(v) > 5, + p=0.8, +) + +test_oneof_produces_three_types = define_test( + oneof_strategy, + lambda v: len({type(x) for x in v}) == 3, + condition=lambda v: len(v) > 5, + p=0.8, +) + +test_oneof_produces_two_types = define_test( + oneof_strategy, + lambda v: len({type(x) for x in v}) == 2, + condition=lambda v: len(v) > 5, + p=0.8, +) + +test_oneof_produces_one_type = define_test( + oneof_strategy, + lambda v: len({type(x) for x in v}) == 1, + condition=lambda v: len(v) > 5, + p=0.8, +)